Merge branch 'main' into v1-webscraper

This commit is contained in:
Gergő Móricz 2024-08-23 17:21:54 +02:00
commit e7f267b6fe
42 changed files with 1250 additions and 284 deletions

View File

@ -22,6 +22,7 @@ env:
SUPABASE_SERVICE_TOKEN: ${{ secrets.SUPABASE_SERVICE_TOKEN }}
SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
TEST_API_KEY: ${{ secrets.TEST_API_KEY }}
SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }}
jobs:
deploy:
@ -30,8 +31,9 @@ jobs:
steps:
- uses: actions/checkout@v3
- uses: superfly/flyctl-actions/setup-flyctl@master
- run: flyctl deploy --remote-only -a firecrawl-scraper-js
- run: flyctl deploy --remote-only -a firecrawl-scraper-js --build-secret SENTRY_AUTH_TOKEN=$SENTRY_AUTH_TOKEN
working-directory: ./apps/api
env:
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}
SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }}

View File

@ -26,6 +26,7 @@ env:
PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }}
PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }}
jobs:
pre-deploy-e2e-tests:
@ -211,11 +212,12 @@ jobs:
steps:
- uses: actions/checkout@v3
- uses: superfly/flyctl-actions/setup-flyctl@master
- run: flyctl deploy --remote-only -a firecrawl-scraper-js
- run: flyctl deploy --remote-only -a firecrawl-scraper-js --build-secret SENTRY_AUTH_TOKEN=$SENTRY_AUTH_TOKEN
working-directory: ./apps/api
env:
FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
BULL_AUTH_KEY: ${{ secrets.BULL_AUTH_KEY }}
SENTRY_AUTH_TOKEN: ${{ secrets.SENTRY_AUTH_TOKEN }}
build-and-publish-python-sdk:
name: Build and publish Python SDK

2
apps/api/.gitignore vendored
View File

@ -8,4 +8,4 @@ dump.rdb
/.next/
.rdb
.sentryclirc
.sentryclirc

View File

@ -12,8 +12,10 @@ RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --prod --frozen-l
FROM base AS build
RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --frozen-lockfile
RUN apt-get update -qq && apt-get install -y ca-certificates && update-ca-certificates
RUN pnpm install
RUN pnpm run build
RUN --mount=type=secret,id=SENTRY_AUTH_TOKEN \
bash -c 'export SENTRY_AUTH_TOKEN="$(cat /run/secrets/SENTRY_AUTH_TOKEN)"; if [ -z $SENTRY_AUTH_TOKEN ]; then pnpm run build:nosentry; else pnpm run build; fi'
# Install packages needed for deployment

View File

@ -9,7 +9,8 @@
"format": "prettier --write \"src/**/*.(js|ts)\"",
"flyio": "node dist/src/index.js",
"start:dev": "nodemon --exec ts-node src/index.ts",
"build": "tsc",
"build": "tsc && pnpm sentry:sourcemaps",
"build:nosentry": "tsc",
"test": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_noAuth/*'",
"test:local-no-auth": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='src/__tests__/e2e_withAuth/*'",
"test:full": "npx jest --detectOpenHandles --forceExit --openHandlesTimeout=120000 --watchAll=false --testPathIgnorePatterns='(src/__tests__/e2e_noAuth|src/__tests__/e2e_withAuth)'",
@ -19,8 +20,9 @@
"mongo-docker": "docker run -d -p 2717:27017 -v ./mongo-data:/data/db --name mongodb mongo:latest",
"mongo-docker-console": "docker exec -it mongodb mongosh",
"run-example": "npx ts-node src/example.ts",
"deploy:fly": "flyctl deploy",
"deploy:fly:staging": "fly deploy -c fly.staging.toml"
"deploy:fly": "flyctl deploy --build-secret SENTRY_AUTH_TOKEN=$(dotenv -p SENTRY_AUTH_TOKEN)",
"deploy:fly:staging": "fly deploy -c fly.staging.toml",
"sentry:sourcemaps": "sentry-cli sourcemaps inject --org caleb-peffer --project firecrawl-scraper-js ./dist && sentry-cli sourcemaps upload --org caleb-peffer --project firecrawl-scraper-js ./dist"
},
"author": "",
"license": "ISC",
@ -52,10 +54,12 @@
"@bull-board/express": "^5.20.5",
"@devil7softwares/pos": "^1.0.2",
"@dqbd/tiktoken": "^1.0.13",
"@hyperdx/node-opentelemetry": "^0.8.0",
"@hyperdx/node-opentelemetry": "^0.8.1",
"@logtail/node": "^0.4.12",
"@nangohq/node": "^0.40.8",
"@sentry/node": "^8.13.0",
"@sentry/cli": "^2.33.1",
"@sentry/node": "^8.26.0",
"@sentry/profiling-node": "^8.26.0",
"@supabase/supabase-js": "^2.44.2",
"@types/express-ws": "^3.0.4",
"@types/ws": "^8.5.12",
@ -72,6 +76,7 @@
"cron-parser": "^4.9.0",
"date-fns": "^3.6.0",
"dotenv": "^16.3.1",
"dotenv-cli": "^7.4.2",
"express-rate-limit": "^7.3.1",
"express-ws": "^5.0.2",
"form-data": "^4.0.0",
@ -120,4 +125,4 @@
"temp"
]
}
}
}

View File

@ -27,17 +27,23 @@ importers:
specifier: ^1.0.13
version: 1.0.15
'@hyperdx/node-opentelemetry':
specifier: ^0.8.0
version: 0.8.0
specifier: ^0.8.1
version: 0.8.1
'@logtail/node':
specifier: ^0.4.12
version: 0.4.21
'@nangohq/node':
specifier: ^0.40.8
version: 0.40.8
'@sentry/cli':
specifier: ^2.33.1
version: 2.33.1
'@sentry/node':
specifier: ^8.13.0
version: 8.13.0
specifier: ^8.26.0
version: 8.26.0
'@sentry/profiling-node':
specifier: ^8.26.0
version: 8.26.0
'@supabase/supabase-js':
specifier: ^2.44.2
version: 2.44.2
@ -86,6 +92,9 @@ importers:
dotenv:
specifier: ^16.3.1
version: 16.4.5
dotenv-cli:
specifier: ^7.4.2
version: 7.4.2
express-rate-limit:
specifier: ^7.3.1
version: 7.3.1(express@4.19.2)
@ -511,8 +520,8 @@ packages:
peerDependencies:
'@opentelemetry/api': ^1.3.0
'@hyperdx/node-opentelemetry@0.8.0':
resolution: {integrity: sha512-2z1jQqg2czctHOgo17WETUJOX2BJJ2jqg50R/z4o4ADRCS7Ynp4n3eVMLtsJHypQeDdrInUDE0VtVoXN5b+6hw==}
'@hyperdx/node-opentelemetry@0.8.1':
resolution: {integrity: sha512-wNw0yQf54j/9KXVWeEOu8G6C5FT5EFlrz4dcmscTkwCvo6fQOLRZa/NbGcqugt0LSFMc0/6/Q5RDWVqDpEn0LQ==}
hasBin: true
'@ioredis/commands@1.2.0':
@ -815,8 +824,8 @@ packages:
peerDependencies:
'@opentelemetry/api': ^1.3.0
'@opentelemetry/instrumentation-connect@0.37.0':
resolution: {integrity: sha512-SeQktDIH5rNzjiEiazWiJAIXkmnLOnNV7wwHpahrqE0Ph+Z3heqMfxRtoMtbdJSIYLfcNZYO51AjxZ00IXufdw==}
'@opentelemetry/instrumentation-connect@0.38.0':
resolution: {integrity: sha512-2/nRnx3pjYEmdPIaBwtgtSviTKHWnDZN3R+TkRUnhIVrvBKVcq+I5B2rtd6mr6Fe9cHlZ9Ojcuh7pkNh/xdWWg==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
@ -845,8 +854,8 @@ packages:
peerDependencies:
'@opentelemetry/api': ^1.3.0
'@opentelemetry/instrumentation-express@0.40.1':
resolution: {integrity: sha512-+RKMvVe2zw3kIXRup9c1jFu3T4d0fs5aKy015TpiMyoCKX1UMu3Z0lfgYtuyiSTANvg5hZnDbWmQmqSPj9VTvg==}
'@opentelemetry/instrumentation-express@0.41.1':
resolution: {integrity: sha512-uRx0V3LPGzjn2bxAnV8eUsDT82vT7NTwI0ezEuPMBOTOsnPpGhWdhcdNdhH80sM4TrWrOfXm9HGEdfWE3TRIww==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
@ -857,8 +866,8 @@ packages:
peerDependencies:
'@opentelemetry/api': ^1.3.0
'@opentelemetry/instrumentation-fastify@0.37.0':
resolution: {integrity: sha512-WRjwzNZgupSzbEYvo9s+QuHJRqZJjVdNxSEpGBwWK8RKLlHGwGVAu0gcc2gPamJWUJsGqPGvahAPWM18ZkWj6A==}
'@opentelemetry/instrumentation-fastify@0.38.0':
resolution: {integrity: sha512-HBVLpTSYpkQZ87/Df3N0gAw7VzYZV3n28THIBrJWfuqw3Or7UqdhnjeuMIPQ04BKk3aZc0cWn2naSQObbh5vXw==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
@ -869,6 +878,12 @@ packages:
peerDependencies:
'@opentelemetry/api': ^1.3.0
'@opentelemetry/instrumentation-fs@0.14.0':
resolution: {integrity: sha512-pVc8P5AgliC1DphyyBUgsxXlm2XaPH4BpYvt7rAZDMIqUpRk8gs19SioABtKqqxvFzg5jPtgJfJsdxq0Y+maLw==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
'@opentelemetry/instrumentation-generic-pool@0.36.0':
resolution: {integrity: sha512-CExAEqJvK8jYxrhN8cl6EaGg57EGJi+qsSKouLC5lndXi68gZLOKbZIMZg4pF0kNfp/D4BFaGmA6Ap7d5WoPTw==}
engines: {node: '>=14'}
@ -881,8 +896,8 @@ packages:
peerDependencies:
'@opentelemetry/api': ^1.3.0
'@opentelemetry/instrumentation-graphql@0.41.0':
resolution: {integrity: sha512-R/gXeljgIhaRDKquVkKYT5QHPnFouM8ooyePZEP0kqyaVAedtR1V7NfAUJbxfTG5fBQa5wdmLjvu63+tzRXZCA==}
'@opentelemetry/instrumentation-graphql@0.42.0':
resolution: {integrity: sha512-N8SOwoKL9KQSX7z3gOaw5UaTeVQcfDO1c21csVHnmnmGUoqsXbArK2B8VuwPWcv6/BC/i3io+xTo7QGRZ/z28Q==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
@ -899,8 +914,8 @@ packages:
peerDependencies:
'@opentelemetry/api': ^1.3.0
'@opentelemetry/instrumentation-hapi@0.39.0':
resolution: {integrity: sha512-ik2nA9Yj2s2ay+aNY+tJsKCsEx6Tsc2g/MK0iWBW5tibwrWKTy1pdVt5sB3kd5Gkimqj23UV5+FH2JFcQLeKug==}
'@opentelemetry/instrumentation-hapi@0.40.0':
resolution: {integrity: sha512-8U/w7Ifumtd2bSN1OLaSwAAFhb9FyqWUki3lMMB0ds+1+HdSxYBe9aspEJEgvxAqOkrQnVniAPTEGf1pGM7SOw==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
@ -923,8 +938,8 @@ packages:
peerDependencies:
'@opentelemetry/api': ^1.3.0
'@opentelemetry/instrumentation-ioredis@0.41.0':
resolution: {integrity: sha512-rxiLloU8VyeJGm5j2fZS8ShVdB82n7VNP8wTwfUQqDwRfHCnkzGr+buKoxuhGD91gtwJ91RHkjHA1Eg6RqsUTg==}
'@opentelemetry/instrumentation-ioredis@0.42.0':
resolution: {integrity: sha512-P11H168EKvBB9TUSasNDOGJCSkpT44XgoM6d3gRIWAa9ghLpYhl0uRkS8//MqPzcJVHr3h3RmfXIpiYLjyIZTw==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
@ -941,8 +956,8 @@ packages:
peerDependencies:
'@opentelemetry/api': ^1.3.0
'@opentelemetry/instrumentation-koa@0.41.0':
resolution: {integrity: sha512-mbPnDt7ELvpM2S0vixYUsde7122lgegLOJQxx8iJQbB8YHal/xnTh9v7IfArSVzIDo+E+080hxZyUZD4boOWkw==}
'@opentelemetry/instrumentation-koa@0.42.0':
resolution: {integrity: sha512-H1BEmnMhho8o8HuNRq5zEI4+SIHDIglNB7BPKohZyWG4fWNuR7yM4GTlR01Syq21vODAS7z5omblScJD/eZdKw==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
@ -965,8 +980,8 @@ packages:
peerDependencies:
'@opentelemetry/api': ^1.3.0
'@opentelemetry/instrumentation-mongodb@0.45.0':
resolution: {integrity: sha512-xnZP9+ayeB1JJyNE9cIiwhOJTzNEsRhXVdLgfzmrs48Chhhk026mQdM5CITfyXSCfN73FGAIB8d91+pflJEfWQ==}
'@opentelemetry/instrumentation-mongodb@0.46.0':
resolution: {integrity: sha512-VF/MicZ5UOBiXrqBslzwxhN7TVqzu1/LN/QDpkskqM0Zm0aZ4CVRbUygL8d7lrjLn15x5kGIe8VsSphMfPJzlA==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
@ -977,8 +992,8 @@ packages:
peerDependencies:
'@opentelemetry/api': ^1.3.0
'@opentelemetry/instrumentation-mongoose@0.39.0':
resolution: {integrity: sha512-J1r66A7zJklPPhMtrFOO7/Ud2p0Pv5u8+r23Cd1JUH6fYPmftNJVsLp2urAt6PHK4jVqpP/YegN8wzjJ2mZNPQ==}
'@opentelemetry/instrumentation-mongoose@0.40.0':
resolution: {integrity: sha512-niRi5ZUnkgzRhIGMOozTyoZIvJKNJyhijQI4nF4iFSb+FUx2v5fngfR+8XLmdQAO7xmsD8E5vEGdDVYVtKbZew==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
@ -989,8 +1004,8 @@ packages:
peerDependencies:
'@opentelemetry/api': ^1.3.0
'@opentelemetry/instrumentation-mysql2@0.39.0':
resolution: {integrity: sha512-Iypuq2z6TCfriAXCIZjRq8GTFCKhQv5SpXbmI+e60rYdXw8NHtMH4NXcGF0eKTuoCsC59IYSTUvDQYDKReaszA==}
'@opentelemetry/instrumentation-mysql2@0.40.0':
resolution: {integrity: sha512-0xfS1xcqUmY7WE1uWjlmI67Xg3QsSUlNT+AcXHeA4BDUPwZtWqF4ezIwLgpVZfHOnkAEheqGfNSWd1PIu3Wnfg==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
@ -1001,8 +1016,8 @@ packages:
peerDependencies:
'@opentelemetry/api': ^1.3.0
'@opentelemetry/instrumentation-mysql@0.39.0':
resolution: {integrity: sha512-8snHPh83rhrDf31v9Kq0Nf+ts8hdr7NguuszRqZomZBHgE0+UyXZSkXHAAFZoBPPRMGyM68uaFE5hVtFl+wOcA==}
'@opentelemetry/instrumentation-mysql@0.40.0':
resolution: {integrity: sha512-d7ja8yizsOCNMYIJt5PH/fKZXjb/mS48zLROO4BzZTtDfhNCl2UM/9VIomP2qkGIFVouSJrGr/T00EzY7bPtKA==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
@ -1013,8 +1028,8 @@ packages:
peerDependencies:
'@opentelemetry/api': ^1.3.0
'@opentelemetry/instrumentation-nestjs-core@0.38.0':
resolution: {integrity: sha512-M381Df1dM8aqihZz2yK+ugvMFK5vlHG/835dc67Sx2hH4pQEQYDA2PpFPTgc9AYYOydQaj7ClFQunESimjXDgg==}
'@opentelemetry/instrumentation-nestjs-core@0.39.0':
resolution: {integrity: sha512-mewVhEXdikyvIZoMIUry8eb8l3HUjuQjSjVbmLVTt4NQi35tkpnHQrG9bTRBrl3403LoWZ2njMPJyg4l6HfKvA==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
@ -1031,8 +1046,8 @@ packages:
peerDependencies:
'@opentelemetry/api': ^1.3.0
'@opentelemetry/instrumentation-pg@0.42.0':
resolution: {integrity: sha512-sjgcM8CswYy8zxHgXv4RAZ09DlYhQ+9TdlourUs63Df/ek5RrB1ZbjznqW7PB6c3TyJJmX6AVtPTjAsROovEjA==}
'@opentelemetry/instrumentation-pg@0.43.0':
resolution: {integrity: sha512-og23KLyoxdnAeFs1UWqzSonuCkePUzCX30keSYigIzJe/6WSYA8rnEI5lobcxPEzg+GcU06J7jzokuEHbjVJNw==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
@ -1049,8 +1064,8 @@ packages:
peerDependencies:
'@opentelemetry/api': ^1.3.0
'@opentelemetry/instrumentation-redis-4@0.40.0':
resolution: {integrity: sha512-0ieQYJb6yl35kXA75LQUPhHtGjtQU9L85KlWa7d4ohBbk/iQKZ3X3CFl5jC5vNMq/GGPB3+w3IxNvALlHtrp7A==}
'@opentelemetry/instrumentation-redis-4@0.41.0':
resolution: {integrity: sha512-H7IfGTqW2reLXqput4yzAe8YpDC0fmVNal95GHMLOrS89W+qWUKIqxolSh63hJyfmwPSFwXASzj7wpSk8Az+Dg==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
@ -1103,8 +1118,8 @@ packages:
peerDependencies:
'@opentelemetry/api': ^1.3.0
'@opentelemetry/instrumentation@0.43.0':
resolution: {integrity: sha512-S1uHE+sxaepgp+t8lvIDuRgyjJWisAb733198kwQTUc9ZtYQ2V2gmyCtR1x21ePGVLoMiX/NWY7WA290hwkjJQ==}
'@opentelemetry/instrumentation@0.46.0':
resolution: {integrity: sha512-a9TijXZZbk0vI5TGLZl+0kxyFfrXHhX6Svtz7Pp2/VBlCSKrazuULEyoJQrOknJyFWNMEmbbJgOciHCCpQcisw==}
engines: {node: '>=14'}
peerDependencies:
'@opentelemetry/api': ^1.3.0
@ -1282,8 +1297,8 @@ packages:
resolution: {integrity: sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==}
engines: {node: '>=14'}
'@prisma/instrumentation@5.16.0':
resolution: {integrity: sha512-MVzNRW2ikWvVNnMIEgQMcwWxpFD+XF2U2h0Qz7MjutRqJxrhWexWV2aSi2OXRaU8UL5wzWw7pnjdKUzYhWauLg==}
'@prisma/instrumentation@5.17.0':
resolution: {integrity: sha512-c1Sle4ji8aasMcYfBBHFM56We4ljfenVtRmS8aY06BllS7SoU6SmJBwG7vil+GHiR0Yrh+t9iBwt4AY0Jr4KNQ==}
'@protobufjs/aspromise@1.1.2':
resolution: {integrity: sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==}
@ -1356,16 +1371,66 @@ packages:
'@selderee/plugin-htmlparser2@0.11.0':
resolution: {integrity: sha512-P33hHGdldxGabLFjPPpaTxVolMrzrcegejx+0GxjrIb9Zv48D8yAIA/QTDR2dFl7Uz7urX8aX6+5bCZslr+gWQ==}
'@sentry/cli-darwin@2.33.1':
resolution: {integrity: sha512-+4/VIx/E1L2hChj5nGf5MHyEPHUNHJ/HoG5RY+B+vyEutGily1c1+DM2bum7RbD0xs6wKLIyup5F02guzSzG8A==}
engines: {node: '>=10'}
os: [darwin]
'@sentry/cli-linux-arm64@2.33.1':
resolution: {integrity: sha512-DbGV56PRKOLsAZJX27Jt2uZ11QfQEMmWB4cIvxkKcFVE+LJP4MVA+MGGRUL6p+Bs1R9ZUuGbpKGtj0JiG6CoXw==}
engines: {node: '>=10'}
cpu: [arm64]
os: [linux, freebsd]
'@sentry/cli-linux-arm@2.33.1':
resolution: {integrity: sha512-zbxEvQju+tgNvzTOt635le4kS/Fbm2XC2RtYbCTs034Vb8xjrAxLnK0z1bQnStUV8BkeBHtsNVrG+NSQDym2wg==}
engines: {node: '>=10'}
cpu: [arm]
os: [linux, freebsd]
'@sentry/cli-linux-i686@2.33.1':
resolution: {integrity: sha512-g2LS4oPXkPWOfKWukKzYp4FnXVRRSwBxhuQ9eSw2peeb58ZIObr4YKGOA/8HJRGkooBJIKGaAR2mH2Pk1TKaiA==}
engines: {node: '>=10'}
cpu: [x86, ia32]
os: [linux, freebsd]
'@sentry/cli-linux-x64@2.33.1':
resolution: {integrity: sha512-IV3dcYV/ZcvO+VGu9U6kuxSdbsV2kzxaBwWUQxtzxJ+cOa7J8Hn1t0koKGtU53JVZNBa06qJWIcqgl4/pCuKIg==}
engines: {node: '>=10'}
cpu: [x64]
os: [linux, freebsd]
'@sentry/cli-win32-i686@2.33.1':
resolution: {integrity: sha512-F7cJySvkpzIu7fnLKNHYwBzZYYwlhoDbAUnaFX0UZCN+5DNp/5LwTp37a5TWOsmCaHMZT4i9IO4SIsnNw16/zQ==}
engines: {node: '>=10'}
cpu: [x86, ia32]
os: [win32]
'@sentry/cli-win32-x64@2.33.1':
resolution: {integrity: sha512-8VyRoJqtb2uQ8/bFRKNuACYZt7r+Xx0k2wXRGTyH05lCjAiVIXn7DiS2BxHFty7M1QEWUCMNsb/UC/x/Cu2wuA==}
engines: {node: '>=10'}
cpu: [x64]
os: [win32]
'@sentry/cli@2.33.1':
resolution: {integrity: sha512-dUlZ4EFh98VFRPJ+f6OW3JEYQ7VvqGNMa0AMcmvk07ePNeK/GicAWmSQE4ZfJTTl80ul6HZw1kY01fGQOQlVRA==}
engines: {node: '>= 10'}
hasBin: true
'@sentry/core@8.13.0':
resolution: {integrity: sha512-N9Qg4ZGxZWp8eb2eUUHVVKgjBLtFIjS805nG92s6yJmkvOpKm6mLtcUaT/iDf3Hta6nG+xRkhbE3r+Z4cbXG8w==}
engines: {node: '>=14.18'}
'@sentry/node@8.13.0':
resolution: {integrity: sha512-OeZ7K90RhyxfwfreerIi4cszzHrPRRH36STJno2+p3sIGbG5VScOccqXzYEOAqHpByxnti4KQN34BLAT2BFOEA==}
'@sentry/core@8.26.0':
resolution: {integrity: sha512-g/tVmTZD4GNbLFf++hKJfBpcCAtduFEMLnbfa9iT/QEZjlmP+EzY+GsH9bafM5VsNe8DiOUp+kJKWtShzlVdBA==}
engines: {node: '>=14.18'}
'@sentry/opentelemetry@8.13.0':
resolution: {integrity: sha512-NYn/HNE/SxFXe8pfnxJknhrrRzYRMHNssCoi5M1CeR5G7F2BGxxVmaGsd8j0WyTCpUS4i97G4vhYtDGxHvWN6w==}
'@sentry/node@8.26.0':
resolution: {integrity: sha512-N9mNLzicnfGgsq6P10ckPdTzEFusjTC7gpqPopwq5eEMF7g798hH8CcE5o6FZ4iAAR3vWliAR/jgccdoMmJMpQ==}
engines: {node: '>=14.18'}
'@sentry/opentelemetry@8.26.0':
resolution: {integrity: sha512-HBDheM/+ysfIz8R1OH4bBIxdgD7ZbQkKLJAUXkdAbBcfbpK/CTtwcplbauF5wY7Q+GYvwL/ShuDwvXRfW+gFyQ==}
engines: {node: '>=14.18'}
peerDependencies:
'@opentelemetry/api': ^1.9.0
@ -1374,14 +1439,27 @@ packages:
'@opentelemetry/sdk-trace-base': ^1.25.1
'@opentelemetry/semantic-conventions': ^1.25.1
'@sentry/profiling-node@8.26.0':
resolution: {integrity: sha512-yGHFoqSKe5j9fDK9n5ntJxDyZnedwjCm6fAXwIlsLJOUBqn5g7l8V1XgBPlCJLZzOG0fbvGvSo4WyBfDoSD8vQ==}
engines: {node: '>=14.18'}
hasBin: true
'@sentry/types@8.13.0':
resolution: {integrity: sha512-r63s/H5gvQnQM9tTGBXz2xErUbxZALh4e2Lg/1aHj4zIvGLBjA2z5qWsh6TEZYbpmgAyGShLDr6+rWeUVf9yBQ==}
engines: {node: '>=14.18'}
'@sentry/types@8.26.0':
resolution: {integrity: sha512-zKmh6SWsJh630rpt7a9vP4Cm4m1C2gDTUqUiH565CajCL/4cePpNWYrNwalSqsOSL7B9OrczA1+n6a6XvND+ng==}
engines: {node: '>=14.18'}
'@sentry/utils@8.13.0':
resolution: {integrity: sha512-PxV0v9VbGWH9zP37P5w2msLUFDr287nYjoY2XVF+RSolyiTs1CQNI5ZMUO3o4MsSac/dpXxjyrZXQd72t/jRYA==}
engines: {node: '>=14.18'}
'@sentry/utils@8.26.0':
resolution: {integrity: sha512-xvlPU9Hd2BlyT+FhWHGNwnxWqdVRk2AHnDtVcW4Ma0Ri5EwS+uy4Jeik5UkSv8C5RVb9VlxFmS8LN3I1MPJsLw==}
engines: {node: '>=14.18'}
'@sinclair/typebox@0.27.8':
resolution: {integrity: sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==}
@ -1734,6 +1812,10 @@ packages:
afinn-165@1.0.4:
resolution: {integrity: sha512-7+Wlx3BImrK0HiG6y3lU4xX7SpBPSSu8T9iguPMlaueRFxjbYwAQrp9lqZUuFikqKbd/en8lVREILvP2J80uJA==}
agent-base@6.0.2:
resolution: {integrity: sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==}
engines: {node: '>= 6.0.0'}
agent-base@7.1.1:
resolution: {integrity: sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==}
engines: {node: '>= 14'}
@ -2303,6 +2385,14 @@ packages:
domutils@3.1.0:
resolution: {integrity: sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==}
dotenv-cli@7.4.2:
resolution: {integrity: sha512-SbUj8l61zIbzyhIbg0FwPJq6+wjbzdn9oEtozQpZ6kW2ihCcapKVZj49oCT3oPM+mgQm+itgvUQcG5szxVrZTA==}
hasBin: true
dotenv-expand@10.0.0:
resolution: {integrity: sha512-GopVGCpVS1UKH75VKHGuQFqS1Gusej0z4FyQkPdwjil2gNIv+LNsqBlboOzpJFZKVT95GkCyWJbBSdFEFUWI2A==}
engines: {node: '>=12'}
dotenv@16.4.5:
resolution: {integrity: sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==}
engines: {node: '>=12'}
@ -2684,6 +2774,10 @@ packages:
engines: {node: '>=12'}
hasBin: true
https-proxy-agent@5.0.1:
resolution: {integrity: sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==}
engines: {node: '>= 6'}
https-proxy-agent@7.0.4:
resolution: {integrity: sha512-wlwpilI7YdjSkWaQ/7omYBMTliDcmCN8OLihO6I9B86g06lMyAoqgoDpV0XqoaPOKj+0DIdAvnsWfyAAhmimcg==}
engines: {node: '>= 14'}
@ -2720,15 +2814,15 @@ packages:
resolution: {integrity: sha512-veYYhQa+D1QBKznvhUHxb8faxlrwUnxseDAbAp457E0wLNio2bOSKnjYDhMj+YiAq61xrMGhQk9iXVk5FzgQMw==}
engines: {node: '>=6'}
import-in-the-middle@1.4.2:
resolution: {integrity: sha512-9WOz1Yh/cvO/p69sxRmhyQwrIGGSp7EIdcb+fFNVi7CzQGQB8U1/1XrKVSbEd/GNOAeM0peJtmi7+qphe7NvAw==}
import-in-the-middle@1.11.0:
resolution: {integrity: sha512-5DimNQGoe0pLUHbR9qK84iWaWjjbsxiqXnw6Qz64+azRgleqv9k2kTt5fw7QsOpmaGYtuxxursnPPsnTKEx10Q==}
import-in-the-middle@1.7.1:
resolution: {integrity: sha512-1LrZPDtW+atAxH42S6288qyDFNQ2YCty+2mxEPRtfazH6Z5QwkaBSTS2ods7hnVJioF6rkRfNoA6A/MstpFXLg==}
import-in-the-middle@1.7.4:
resolution: {integrity: sha512-Lk+qzWmiQuRPPulGQeK5qq0v32k2bHnWrRPFgqyvhw7Kkov5L6MOLOIU3pcWeujc9W4q54Cp3Q2WV16eQkc7Bg==}
import-in-the-middle@1.8.1:
resolution: {integrity: sha512-yhRwoHtiLGvmSozNOALgjRPFI6uYsds60EoMqqnXyyv+JOIW/BrrLejuTGBt+bq0T5tLzOHrN0T7xYTm4Qt/ng==}
import-local@3.1.0:
resolution: {integrity: sha512-ASB07uLtnDs1o6EHjKpX34BKYDSqnFerfTOJL2HvMqF70LnxpjkzDB8J44oT9pu4AMPkQwf8jl6szgvNd2tRIg==}
engines: {node: '>=8'}
@ -3548,6 +3642,10 @@ packages:
resolution: {integrity: sha512-dBpDMdxv9Irdq66304OLfEmQ9tbNRFnFTuZiLo+bD+r332bBmMJ8GBLXklIXXgxd3+v9+KUnZaUR5PJMa75Gsg==}
engines: {node: '>= 0.4.0'}
node-abi@3.67.0:
resolution: {integrity: sha512-bLn/fU/ALVBE9wj+p4Y21ZJWYFjUXLXPi/IewyLZkx3ApxKDNBWCKdReeKOtD8dWpOdDCeMyLh6ZewzcLsG2Nw==}
engines: {node: '>=10'}
node-abort-controller@3.1.1:
resolution: {integrity: sha512-AGK2yQKIjRuqnc6VkX2Xj5d+QW8xZ87pa1UK6yA6ouUyuxfHuMP6umE5QK7UmTeOAymo+Zx1Fxiuw9rVx8taHQ==}
@ -3646,9 +3744,11 @@ packages:
resolution: {integrity: sha512-ur5UIdyw5Y7yEj9wLzhqXiy6GZ3Mwx0yGI+5sMn2r0N0v3cKJvUmFH5yPP+WXh9e0xfyzyJX95D8l088DNFj7A==}
hasBin: true
opentelemetry-instrumentation-fetch-node@1.2.0:
resolution: {integrity: sha512-aiSt/4ubOTyb1N5C2ZbGrBvaJOXIZhZvpRPYuUVxQJe27wJZqf/o65iPrqgLcgfeOLaQ8cS2Q+762jrYvniTrA==}
opentelemetry-instrumentation-fetch-node@1.2.3:
resolution: {integrity: sha512-Qb11T7KvoCevMaSeuamcLsAD+pZnavkhDnlVL0kRozfhl42dKG5Q3anUklAFKJZjY3twLR+BnRa6DlwwkIE/+A==}
engines: {node: '>18.0.0'}
peerDependencies:
'@opentelemetry/api': ^1.6.0
option@0.2.4:
resolution: {integrity: sha512-pkEqbDyl8ou5cpq+VsnQbe/WlEy5qS7xPzMS1U55OCG9KPvwFD46zDbxQIj3egJSFc3D+XhYOPUzz49zQAVy7A==}
@ -4971,7 +5071,7 @@ snapshots:
transitivePeerDependencies:
- supports-color
'@hyperdx/node-opentelemetry@0.8.0':
'@hyperdx/node-opentelemetry@0.8.1':
dependencies:
'@hyperdx/instrumentation-exception': 0.1.0(@opentelemetry/api@1.9.0)
'@hyperdx/instrumentation-sentry-node': 0.1.0(@opentelemetry/api@1.9.0)
@ -4996,6 +5096,7 @@ snapshots:
lodash.isobject: 3.0.2
lodash.isplainobject: 4.0.6
lodash.isstring: 4.0.1
node-fetch: 2.7.0
open: 8.4.2
ora: 5.4.1
pino-abstract-transport: 1.2.0
@ -5518,7 +5619,7 @@ snapshots:
transitivePeerDependencies:
- supports-color
'@opentelemetry/instrumentation-connect@0.37.0(@opentelemetry/api@1.9.0)':
'@opentelemetry/instrumentation-connect@0.38.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
'@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
@ -5561,7 +5662,7 @@ snapshots:
transitivePeerDependencies:
- supports-color
'@opentelemetry/instrumentation-express@0.40.1(@opentelemetry/api@1.9.0)':
'@opentelemetry/instrumentation-express@0.41.1(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
'@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
@ -5579,7 +5680,7 @@ snapshots:
transitivePeerDependencies:
- supports-color
'@opentelemetry/instrumentation-fastify@0.37.0(@opentelemetry/api@1.9.0)':
'@opentelemetry/instrumentation-fastify@0.38.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
'@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
@ -5596,6 +5697,14 @@ snapshots:
transitivePeerDependencies:
- supports-color
'@opentelemetry/instrumentation-fs@0.14.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
'@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0)
transitivePeerDependencies:
- supports-color
'@opentelemetry/instrumentation-generic-pool@0.36.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
@ -5611,7 +5720,7 @@ snapshots:
transitivePeerDependencies:
- supports-color
'@opentelemetry/instrumentation-graphql@0.41.0(@opentelemetry/api@1.9.0)':
'@opentelemetry/instrumentation-graphql@0.42.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
'@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0)
@ -5635,7 +5744,7 @@ snapshots:
transitivePeerDependencies:
- supports-color
'@opentelemetry/instrumentation-hapi@0.39.0(@opentelemetry/api@1.9.0)':
'@opentelemetry/instrumentation-hapi@0.40.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
'@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
@ -5673,7 +5782,7 @@ snapshots:
transitivePeerDependencies:
- supports-color
'@opentelemetry/instrumentation-ioredis@0.41.0(@opentelemetry/api@1.9.0)':
'@opentelemetry/instrumentation-ioredis@0.42.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
'@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0)
@ -5701,14 +5810,12 @@ snapshots:
transitivePeerDependencies:
- supports-color
'@opentelemetry/instrumentation-koa@0.41.0(@opentelemetry/api@1.9.0)':
'@opentelemetry/instrumentation-koa@0.42.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
'@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0)
'@opentelemetry/semantic-conventions': 1.25.1
'@types/koa': 2.14.0
'@types/koa__router': 12.0.3
transitivePeerDependencies:
- supports-color
@ -5737,7 +5844,7 @@ snapshots:
transitivePeerDependencies:
- supports-color
'@opentelemetry/instrumentation-mongodb@0.45.0(@opentelemetry/api@1.9.0)':
'@opentelemetry/instrumentation-mongodb@0.46.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
'@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0)
@ -5755,7 +5862,7 @@ snapshots:
transitivePeerDependencies:
- supports-color
'@opentelemetry/instrumentation-mongoose@0.39.0(@opentelemetry/api@1.9.0)':
'@opentelemetry/instrumentation-mongoose@0.40.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
'@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
@ -5773,7 +5880,7 @@ snapshots:
transitivePeerDependencies:
- supports-color
'@opentelemetry/instrumentation-mysql2@0.39.0(@opentelemetry/api@1.9.0)':
'@opentelemetry/instrumentation-mysql2@0.40.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
'@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0)
@ -5791,7 +5898,7 @@ snapshots:
transitivePeerDependencies:
- supports-color
'@opentelemetry/instrumentation-mysql@0.39.0(@opentelemetry/api@1.9.0)':
'@opentelemetry/instrumentation-mysql@0.40.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
'@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0)
@ -5808,7 +5915,7 @@ snapshots:
transitivePeerDependencies:
- supports-color
'@opentelemetry/instrumentation-nestjs-core@0.38.0(@opentelemetry/api@1.9.0)':
'@opentelemetry/instrumentation-nestjs-core@0.39.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
'@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0)
@ -5835,7 +5942,7 @@ snapshots:
transitivePeerDependencies:
- supports-color
'@opentelemetry/instrumentation-pg@0.42.0(@opentelemetry/api@1.9.0)':
'@opentelemetry/instrumentation-pg@0.43.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
'@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0)
@ -5862,7 +5969,7 @@ snapshots:
transitivePeerDependencies:
- supports-color
'@opentelemetry/instrumentation-redis-4@0.40.0(@opentelemetry/api@1.9.0)':
'@opentelemetry/instrumentation-redis-4@0.41.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
'@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0)
@ -5937,11 +6044,11 @@ snapshots:
transitivePeerDependencies:
- supports-color
'@opentelemetry/instrumentation@0.43.0(@opentelemetry/api@1.9.0)':
'@opentelemetry/instrumentation@0.46.0(@opentelemetry/api@1.9.0)':
dependencies:
'@opentelemetry/api': 1.9.0
'@types/shimmer': 1.0.5
import-in-the-middle: 1.4.2
import-in-the-middle: 1.7.1
require-in-the-middle: 7.3.0
semver: 7.6.2
shimmer: 1.2.1
@ -5966,7 +6073,7 @@ snapshots:
'@opentelemetry/api': 1.9.0
'@opentelemetry/api-logs': 0.52.1
'@types/shimmer': 1.0.5
import-in-the-middle: 1.8.1
import-in-the-middle: 1.11.0
require-in-the-middle: 7.3.0
semver: 7.6.2
shimmer: 1.2.1
@ -6156,7 +6263,7 @@ snapshots:
'@pkgjs/parseargs@0.11.0':
optional: true
'@prisma/instrumentation@5.16.0':
'@prisma/instrumentation@5.17.0':
dependencies:
'@opentelemetry/api': 1.9.0
'@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0)
@ -6239,62 +6346,126 @@ snapshots:
domhandler: 5.0.3
selderee: 0.11.0
'@sentry/cli-darwin@2.33.1':
optional: true
'@sentry/cli-linux-arm64@2.33.1':
optional: true
'@sentry/cli-linux-arm@2.33.1':
optional: true
'@sentry/cli-linux-i686@2.33.1':
optional: true
'@sentry/cli-linux-x64@2.33.1':
optional: true
'@sentry/cli-win32-i686@2.33.1':
optional: true
'@sentry/cli-win32-x64@2.33.1':
optional: true
'@sentry/cli@2.33.1':
dependencies:
https-proxy-agent: 5.0.1
node-fetch: 2.7.0
progress: 2.0.3
proxy-from-env: 1.1.0
which: 2.0.2
optionalDependencies:
'@sentry/cli-darwin': 2.33.1
'@sentry/cli-linux-arm': 2.33.1
'@sentry/cli-linux-arm64': 2.33.1
'@sentry/cli-linux-i686': 2.33.1
'@sentry/cli-linux-x64': 2.33.1
'@sentry/cli-win32-i686': 2.33.1
'@sentry/cli-win32-x64': 2.33.1
transitivePeerDependencies:
- encoding
- supports-color
'@sentry/core@8.13.0':
dependencies:
'@sentry/types': 8.13.0
'@sentry/utils': 8.13.0
'@sentry/node@8.13.0':
'@sentry/core@8.26.0':
dependencies:
'@sentry/types': 8.26.0
'@sentry/utils': 8.26.0
'@sentry/node@8.26.0':
dependencies:
'@opentelemetry/api': 1.9.0
'@opentelemetry/context-async-hooks': 1.25.1(@opentelemetry/api@1.9.0)
'@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-connect': 0.37.0(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-express': 0.40.1(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-fastify': 0.37.0(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-graphql': 0.41.0(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-hapi': 0.39.0(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-connect': 0.38.0(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-express': 0.41.1(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-fastify': 0.38.0(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-fs': 0.14.0(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-graphql': 0.42.0(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-hapi': 0.40.0(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-http': 0.52.1(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-ioredis': 0.41.0(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-koa': 0.41.0(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-mongodb': 0.45.0(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-mongoose': 0.39.0(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-mysql': 0.39.0(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-mysql2': 0.39.0(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-nestjs-core': 0.38.0(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-pg': 0.42.0(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-redis-4': 0.40.0(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-ioredis': 0.42.0(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-koa': 0.42.0(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-mongodb': 0.46.0(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-mongoose': 0.40.0(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-mysql': 0.40.0(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-mysql2': 0.40.0(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-nestjs-core': 0.39.0(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-pg': 0.43.0(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation-redis-4': 0.41.0(@opentelemetry/api@1.9.0)
'@opentelemetry/resources': 1.25.1(@opentelemetry/api@1.9.0)
'@opentelemetry/sdk-trace-base': 1.25.1(@opentelemetry/api@1.9.0)
'@opentelemetry/semantic-conventions': 1.25.1
'@prisma/instrumentation': 5.16.0
'@sentry/core': 8.13.0
'@sentry/opentelemetry': 8.13.0(@opentelemetry/api@1.9.0)(@opentelemetry/core@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/instrumentation@0.52.1(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/semantic-conventions@1.25.1)
'@sentry/types': 8.13.0
'@sentry/utils': 8.13.0
'@prisma/instrumentation': 5.17.0
'@sentry/core': 8.26.0
'@sentry/opentelemetry': 8.26.0(@opentelemetry/api@1.9.0)(@opentelemetry/core@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/instrumentation@0.52.1(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/semantic-conventions@1.25.1)
'@sentry/types': 8.26.0
'@sentry/utils': 8.26.0
import-in-the-middle: 1.11.0
optionalDependencies:
opentelemetry-instrumentation-fetch-node: 1.2.0
opentelemetry-instrumentation-fetch-node: 1.2.3(@opentelemetry/api@1.9.0)
transitivePeerDependencies:
- supports-color
'@sentry/opentelemetry@8.13.0(@opentelemetry/api@1.9.0)(@opentelemetry/core@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/instrumentation@0.52.1(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/semantic-conventions@1.25.1)':
'@sentry/opentelemetry@8.26.0(@opentelemetry/api@1.9.0)(@opentelemetry/core@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/instrumentation@0.52.1(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@1.25.1(@opentelemetry/api@1.9.0))(@opentelemetry/semantic-conventions@1.25.1)':
dependencies:
'@opentelemetry/api': 1.9.0
'@opentelemetry/core': 1.25.1(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation': 0.52.1(@opentelemetry/api@1.9.0)
'@opentelemetry/sdk-trace-base': 1.25.1(@opentelemetry/api@1.9.0)
'@opentelemetry/semantic-conventions': 1.25.1
'@sentry/core': 8.13.0
'@sentry/types': 8.13.0
'@sentry/utils': 8.13.0
'@sentry/core': 8.26.0
'@sentry/types': 8.26.0
'@sentry/utils': 8.26.0
'@sentry/profiling-node@8.26.0':
dependencies:
'@sentry/core': 8.26.0
'@sentry/node': 8.26.0
'@sentry/types': 8.26.0
'@sentry/utils': 8.26.0
detect-libc: 2.0.3
node-abi: 3.67.0
transitivePeerDependencies:
- supports-color
'@sentry/types@8.13.0': {}
'@sentry/types@8.26.0': {}
'@sentry/utils@8.13.0':
dependencies:
'@sentry/types': 8.13.0
'@sentry/utils@8.26.0':
dependencies:
'@sentry/types': 8.26.0
'@sinclair/typebox@0.27.8': {}
'@sinonjs/commons@3.0.1':
@ -6681,6 +6852,12 @@ snapshots:
afinn-165@1.0.4: {}
agent-base@6.0.2:
dependencies:
debug: 4.3.5
transitivePeerDependencies:
- supports-color
agent-base@7.1.1:
dependencies:
debug: 4.3.5
@ -7232,8 +7409,7 @@ snapshots:
destroy@1.2.0: {}
detect-libc@2.0.3:
optional: true
detect-libc@2.0.3: {}
detect-newline@3.1.0: {}
@ -7270,6 +7446,15 @@ snapshots:
domelementtype: 2.3.0
domhandler: 5.0.3
dotenv-cli@7.4.2:
dependencies:
cross-spawn: 7.0.3
dotenv: 16.4.5
dotenv-expand: 10.0.0
minimist: 1.2.8
dotenv-expand@10.0.0: {}
dotenv@16.4.5: {}
duck@0.1.12:
@ -7708,6 +7893,13 @@ snapshots:
- debug
- supports-color
https-proxy-agent@5.0.1:
dependencies:
agent-base: 6.0.2
debug: 4.3.5
transitivePeerDependencies:
- supports-color
https-proxy-agent@7.0.4:
dependencies:
agent-base: 7.1.1
@ -7747,7 +7939,14 @@ snapshots:
parent-module: 1.0.1
resolve-from: 4.0.0
import-in-the-middle@1.4.2:
import-in-the-middle@1.11.0:
dependencies:
acorn: 8.12.0
acorn-import-attributes: 1.9.5(acorn@8.12.0)
cjs-module-lexer: 1.3.1
module-details-from-path: 1.0.3
import-in-the-middle@1.7.1:
dependencies:
acorn: 8.12.0
acorn-import-assertions: 1.9.0(acorn@8.12.0)
@ -7762,13 +7961,6 @@ snapshots:
cjs-module-lexer: 1.3.1
module-details-from-path: 1.0.3
import-in-the-middle@1.8.1:
dependencies:
acorn: 8.12.0
acorn-import-attributes: 1.9.5(acorn@8.12.0)
cjs-module-lexer: 1.3.1
module-details-from-path: 1.0.3
import-local@3.1.0:
dependencies:
pkg-dir: 4.2.0
@ -8647,6 +8839,10 @@ snapshots:
netmask@2.0.2: {}
node-abi@3.67.0:
dependencies:
semver: 7.6.2
node-abort-controller@3.1.1: {}
node-domexception@1.0.0: {}
@ -8749,10 +8945,10 @@ snapshots:
opener@1.5.2: {}
opentelemetry-instrumentation-fetch-node@1.2.0:
opentelemetry-instrumentation-fetch-node@1.2.3(@opentelemetry/api@1.9.0):
dependencies:
'@opentelemetry/api': 1.9.0
'@opentelemetry/instrumentation': 0.43.0(@opentelemetry/api@1.9.0)
'@opentelemetry/instrumentation': 0.46.0(@opentelemetry/api@1.9.0)
'@opentelemetry/semantic-conventions': 1.25.1
transitivePeerDependencies:
- supports-color

View File

@ -15,6 +15,7 @@ import { redlock } from "../../../src/services/redlock";
import { getValue } from "../../../src/services/redis";
import { setValue } from "../../../src/services/redis";
import { validate } from "uuid";
import * as Sentry from "@sentry/node";
function normalizedApiIsUuid(potentialUuid: string): boolean {
// Check if the string is a valid UUID
@ -34,6 +35,7 @@ function setTrace(team_id: string, api_key: string) {
api_key,
});
} catch (error) {
Sentry.captureException(error);
Logger.error(`Error setting trace attributes: ${error.message}`);
}
}
@ -49,6 +51,7 @@ async function getKeyAndPriceId(normalizedApi: string): Promise<{
api_key: normalizedApi,
});
if (error) {
Sentry.captureException(error);
Logger.error(`RPC ERROR (get_key_and_price_id_2): ${error.message}`);
return {
success: false,
@ -58,7 +61,10 @@ async function getKeyAndPriceId(normalizedApi: string): Promise<{
};
}
if (!data || data.length === 0) {
Logger.warn(`Error fetching api key: ${error.message} or data is empty`);
if (error) {
Logger.warn(`Error fetching api key: ${error.message} or data is empty`);
Sentry.captureException(error);
}
// TODO: change this error code ?
return {
success: false,
@ -152,7 +158,8 @@ export async function supaAuthenticateUser(
);
}
} catch (error) {
Logger.error(`Error with auth function: ${error.message}`);
Sentry.captureException(error);
Logger.error(`Error with auth function: ${error}`);
// const {
// success,
// teamId: tId,
@ -268,7 +275,7 @@ export async function supaAuthenticateUser(
return {
success: false,
error: `Rate limit exceeded. Consumed points: ${rateLimiterRes.consumedPoints}, Remaining points: ${rateLimiterRes.remainingPoints}. Upgrade your plan at https://firecrawl.dev/pricing for increased rate limits or please retry after ${secs}s, resets at ${retryDate}`,
error: `Rate limit exceeded. Consumed (req/min): ${rateLimiterRes.consumedPoints}, Remaining (req/min): ${rateLimiterRes.remainingPoints}. Upgrade your plan at https://firecrawl.dev/pricing for increased rate limits or please retry after ${secs}s, resets at ${retryDate}`,
status: 429,
};
}
@ -302,7 +309,10 @@ export async function supaAuthenticateUser(
.eq("key", normalizedApi);
if (error || !data || data.length === 0) {
Logger.warn(`Error fetching api key: ${error.message} or data is empty`);
if (error) {
Sentry.captureException(error);
Logger.warn(`Error fetching api key: ${error.message} or data is empty`);
}
return {
success: false,
error: "Unauthorized: Invalid token",

View File

@ -4,6 +4,7 @@ import { RateLimiterMode } from "../../../src/types";
import { supabase_service } from "../../../src/services/supabase";
import { Logger } from "../../../src/lib/logger";
import { getCrawl, saveCrawl } from "../../../src/lib/crawl-redis";
import * as Sentry from "@sentry/node";
export async function crawlCancelController(req: Request, res: Response) {
try {
@ -50,6 +51,7 @@ export async function crawlCancelController(req: Request, res: Response) {
status: "cancelled"
});
} catch (error) {
Sentry.captureException(error);
Logger.error(error);
return res.status(500).json({ error: error.message });
}

View File

@ -4,7 +4,29 @@ import { RateLimiterMode } from "../../../src/types";
import { getScrapeQueue } from "../../../src/services/queue-service";
import { Logger } from "../../../src/lib/logger";
import { getCrawl, getCrawlJobs } from "../../../src/lib/crawl-redis";
import { supabaseGetJobById } from "../../../src/lib/supabase-jobs";
import { supabaseGetJobsById } from "../../../src/lib/supabase-jobs";
import * as Sentry from "@sentry/node";
export async function getJobs(ids: string[]) {
const jobs = (await Promise.all(ids.map(x => getScrapeQueue().getJob(x)))).filter(x => x);
if (process.env.USE_DB_AUTHENTICATION === "true") {
const supabaseData = await supabaseGetJobsById(ids);
supabaseData.forEach(x => {
const job = jobs.find(y => y.id === x.job_id);
if (job) {
job.returnvalue = x.docs;
}
})
}
jobs.forEach(job => {
job.returnvalue = Array.isArray(job.returnvalue) ? job.returnvalue[0] : job.returnvalue;
});
return jobs;
}
export async function crawlStatusController(req: Request, res: Response) {
try {
@ -28,19 +50,7 @@ export async function crawlStatusController(req: Request, res: Response) {
const jobIDs = await getCrawlJobs(req.params.jobId);
const jobs = (await Promise.all(jobIDs.map(async x => {
const job = await getScrapeQueue().getJob(x);
if (process.env.USE_DB_AUTHENTICATION === "true") {
const supabaseData = await supabaseGetJobById(job.id);
if (supabaseData) {
job.returnvalue = supabaseData.docs;
}
}
return job;
}))).sort((a, b) => a.timestamp - b.timestamp);
const jobs = (await getJobs(jobIDs)).sort((a, b) => a.timestamp - b.timestamp);
const jobStatuses = await Promise.all(jobs.map(x => x.getState()));
const jobStatus = sc.cancelled ? "failed" : jobStatuses.every(x => x === "completed") ? "completed" : jobStatuses.some(x => x === "failed") ? "failed" : "active";
@ -54,6 +64,7 @@ export async function crawlStatusController(req: Request, res: Response) {
partial_data: jobStatus === "completed" ? [] : data.filter(x => x !== null),
});
} catch (error) {
Sentry.captureException(error);
Logger.error(error);
return res.status(500).json({ error: error.message });
}

View File

@ -13,6 +13,7 @@ import { Logger } from "../../../src/lib/logger";
import { addCrawlJob, addCrawlJobs, crawlToCrawler, lockURL, lockURLs, saveCrawl, StoredCrawl } from "../../../src/lib/crawl-redis";
import { getScrapeQueue } from "../../../src/services/queue-service";
import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
import * as Sentry from "@sentry/node";
export async function crawlController(req: Request, res: Response) {
try {
@ -38,16 +39,50 @@ export async function crawlController(req: Request, res: Response) {
}
}
const { success: creditsCheckSuccess, message: creditsCheckMessage } =
await checkTeamCredits(team_id, 1);
if (!creditsCheckSuccess) {
return res.status(402).json({ error: "Insufficient credits" });
const crawlerOptions = {
...defaultCrawlerOptions,
...req.body.crawlerOptions,
};
const pageOptions = { ...defaultCrawlPageOptions, ...req.body.pageOptions };
if (Array.isArray(crawlerOptions.includes)) {
for (const x of crawlerOptions.includes) {
try {
new RegExp(x);
} catch (e) {
return res.status(400).json({ error: e.message });
}
}
}
if (Array.isArray(crawlerOptions.excludes)) {
for (const x of crawlerOptions.excludes) {
try {
new RegExp(x);
} catch (e) {
return res.status(400).json({ error: e.message });
}
}
}
const limitCheck = req.body?.crawlerOptions?.limit ?? 1;
const { success: creditsCheckSuccess, message: creditsCheckMessage, remainingCredits } =
await checkTeamCredits(team_id, limitCheck);
if (!creditsCheckSuccess) {
return res.status(402).json({ error: "Insufficient credits. You may be requesting with a higher limit than the amount of credits you have left. If not, upgrade your plan at https://firecrawl.dev/pricing or contact us at hello@firecrawl.com" });
}
// TODO: need to do this to v1
crawlerOptions.limit = Math.min(remainingCredits, crawlerOptions.limit);
let url = req.body.url;
if (!url) {
return res.status(400).json({ error: "Url is required" });
}
if (typeof url !== "string") {
return res.status(400).json({ error: "URL must be a string" });
}
try {
url = checkAndUpdateURL(url).url;
} catch (e) {
@ -57,19 +92,12 @@ export async function crawlController(req: Request, res: Response) {
}
if (isUrlBlocked(url)) {
return res
.status(403)
.json({
error:
"Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
});
return res.status(403).json({
error:
"Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it.",
});
}
const mode = req.body.mode ?? "crawl";
const crawlerOptions = { ...defaultCrawlerOptions, ...req.body.crawlerOptions };
const pageOptions = { ...defaultCrawlPageOptions, ...req.body.pageOptions };
// if (mode === "single_urls" && !url.includes(",")) { // NOTE: do we need this?
// try {
// const a = new WebScraperDataProvider();
@ -119,10 +147,12 @@ export async function crawlController(req: Request, res: Response) {
await saveCrawl(id, sc);
const sitemap = sc.crawlerOptions?.ignoreSitemap ? null : await crawler.tryGetSitemap();
const sitemap = sc.crawlerOptions?.ignoreSitemap
? null
: await crawler.tryGetSitemap();
if (sitemap !== null) {
const jobs = sitemap.map(x => {
if (sitemap !== null && sitemap.length > 0) {
const jobs = sitemap.map((x) => {
const url = x.url;
const uuid = uuidv4();
return {
@ -140,31 +170,48 @@ export async function crawlController(req: Request, res: Response) {
opts: {
jobId: uuid,
priority: 20,
}
},
};
})
});
await lockURLs(id, jobs.map(x => x.data.url));
await addCrawlJobs(id, jobs.map(x => x.opts.jobId));
await getScrapeQueue().addBulk(jobs);
await lockURLs(
id,
jobs.map((x) => x.data.url)
);
await addCrawlJobs(
id,
jobs.map((x) => x.opts.jobId)
);
if (Sentry.isInitialized()) {
for (const job of jobs) {
// add with sentry instrumentation
await addScrapeJob(job.data as any, {}, job.opts.jobId);
}
} else {
await getScrapeQueue().addBulk(jobs);
}
} else {
await lockURL(id, sc, url);
const job = await addScrapeJob({
url,
mode: "single_urls",
crawlerOptions: crawlerOptions,
team_id: team_id,
pageOptions: pageOptions,
origin: req.body.origin ?? defaultOrigin,
crawl_id: id,
}, {
priority: 15, // prioritize request 0 of crawl jobs same as scrape jobs
});
const job = await addScrapeJob(
{
url,
mode: "single_urls",
crawlerOptions: crawlerOptions,
team_id: team_id,
pageOptions: pageOptions,
origin: req.body.origin ?? defaultOrigin,
crawl_id: id,
},
{
priority: 15, // prioritize request 0 of crawl jobs same as scrape jobs
}
);
await addCrawlJob(id, job.id);
}
res.json({ jobId: id });
} catch (error) {
Sentry.captureException(error);
Logger.error(error);
return res.status(500).json({ error: error.message });
}

View File

@ -7,6 +7,7 @@ import { Logger } from "../../../src/lib/logger";
import { addCrawlJob, crawlToCrawler, lockURL, saveCrawl, StoredCrawl } from "../../../src/lib/crawl-redis";
import { addScrapeJob } from "../../../src/services/queue-jobs";
import { checkAndUpdateURL } from "../../../src/lib/validateUrl";
import * as Sentry from "@sentry/node";
export async function crawlPreviewController(req: Request, res: Response) {
try {
@ -129,6 +130,7 @@ export async function crawlPreviewController(req: Request, res: Response) {
res.json({ jobId: id });
} catch (error) {
Sentry.captureException(error);
Logger.error(error);
return res.status(500).json({ error: error.message });
}

View File

@ -9,9 +9,10 @@ import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist"; // Impo
import { numTokensFromString } from '../../lib/LLM-extraction/helpers';
import { defaultPageOptions, defaultExtractorOptions, defaultTimeout, defaultOrigin } from '../../lib/default-values';
import { addScrapeJob } from '../../services/queue-jobs';
import { scrapeQueueEvents } from '../../services/queue-service';
import { getScrapeQueue } from '../../services/queue-service';
import { v4 as uuidv4 } from "uuid";
import { Logger } from '../../lib/logger';
import * as Sentry from "@sentry/node";
export async function scrapeHelper(
jobId: string,
@ -48,18 +49,39 @@ export async function scrapeHelper(
}, {}, jobId);
let doc;
try {
doc = (await job.waitUntilFinished(scrapeQueueEvents, timeout))[0]; //60 seconds timeout
} catch (e) {
if (e instanceof Error && e.message.startsWith("Job wait")) {
return {
success: false,
error: "Request timed out",
returnCode: 408,
const err = await Sentry.startSpan({ name: "Wait for job to finish", op: "bullmq.wait", attributes: { job: jobId } }, async (span) => {
try {
doc = (await new Promise((resolve, reject) => {
const start = Date.now();
const int = setInterval(async () => {
if (Date.now() >= start + timeout) {
clearInterval(int);
reject(new Error("Job wait "));
} else if (await job.getState() === "completed") {
clearInterval(int);
resolve((await getScrapeQueue().getJob(job.id)).returnvalue);
}
}, 1000);
}))[0]
} catch (e) {
if (e instanceof Error && e.message.startsWith("Job wait")) {
span.setAttribute("timedOut", true);
return {
success: false,
error: "Request timed out",
returnCode: 408,
}
} else {
throw e;
}
} else {
throw e;
}
span.setAttribute("result", JSON.stringify(doc));
return null;
});
if (err !== null) {
return err;
}
await job.remove();
@ -112,26 +134,26 @@ export async function scrapeController(req: Request, res: Response) {
let timeout = req.body.timeout ?? defaultTimeout;
if (extractorOptions.mode.includes("llm-extraction")) {
if (typeof extractorOptions.extractionSchema !== "object" || extractorOptions.extractionSchema === null) {
return res.status(400).json({ error: "extractorOptions.extractionSchema must be an object if llm-extraction mode is specified" });
}
pageOptions.onlyMainContent = true;
timeout = req.body.timeout ?? 90000;
}
const checkCredits = async () => {
try {
const { success: creditsCheckSuccess, message: creditsCheckMessage } = await checkTeamCredits(team_id, 1);
if (!creditsCheckSuccess) {
earlyReturn = true;
return res.status(402).json({ error: "Insufficient credits" });
}
} catch (error) {
Logger.error(error);
// checkCredits
try {
const { success: creditsCheckSuccess, message: creditsCheckMessage } = await checkTeamCredits(team_id, 1);
if (!creditsCheckSuccess) {
earlyReturn = true;
return res.status(500).json({ error: "Error checking team credits. Please contact hello@firecrawl.com for help." });
return res.status(402).json({ error: "Insufficient credits" });
}
};
await checkCredits();
} catch (error) {
Logger.error(error);
earlyReturn = true;
return res.status(500).json({ error: "Error checking team credits. Please contact hello@firecrawl.com for help." });
}
const jobId = uuidv4();
@ -198,6 +220,7 @@ export async function scrapeController(req: Request, res: Response) {
return res.status(result.returnCode).json(result);
} catch (error) {
Sentry.captureException(error);
Logger.error(error);
return res.status(500).json({ error: error.message });
}

View File

@ -9,7 +9,9 @@ import { search } from "../../search";
import { isUrlBlocked } from "../../scraper/WebScraper/utils/blocklist";
import { v4 as uuidv4 } from "uuid";
import { Logger } from "../../lib/logger";
import { getScrapeQueue, scrapeQueueEvents } from "../../services/queue-service";
import { getScrapeQueue } from "../../services/queue-service";
import { addScrapeJob } from "../../services/queue-jobs";
import * as Sentry from "@sentry/node";
export async function searchHelper(
jobId: string,
@ -90,22 +92,44 @@ export async function searchHelper(
},
opts: {
jobId: uuid,
priority: 10,
priority: 20,
}
};
})
const jobs = await getScrapeQueue().addBulk(jobDatas);
const docs = (await Promise.all(jobs.map(x => x.waitUntilFinished(scrapeQueueEvents, 60000)))).map(x => x[0]);
let jobs = [];
if (Sentry.isInitialized()) {
for (const job of jobDatas) {
// add with sentry instrumentation
jobs.push(await addScrapeJob(job.data as any, {}, job.opts.jobId));
}
} else {
jobs = await getScrapeQueue().addBulk(jobDatas);
await getScrapeQueue().addBulk(jobs);
}
const docs = (await Promise.all(jobs.map(x => new Promise((resolve, reject) => {
const start = Date.now();
const int = setInterval(async () => {
if (Date.now() >= start + 60000) {
clearInterval(int);
reject(new Error("Job wait "));
} else if (await x.getState() === "completed") {
clearInterval(int);
resolve((await getScrapeQueue().getJob(x.id)).returnvalue);
}
}, 1000);
})))).map(x => x[0]);
if (docs.length === 0) {
return { success: true, error: "No search results found", returnCode: 200 };
}
await Promise.all(jobs.map(x => x.remove()));
// make sure doc.content is not empty
const filteredDocs = docs.filter(
(doc: { content?: string }) => doc.content && doc.content.trim().length > 0
(doc: { content?: string }) => doc && doc.content && doc.content.trim().length > 0
);
if (filteredDocs.length === 0) {
@ -151,6 +175,7 @@ export async function searchController(req: Request, res: Response) {
return res.status(402).json({ error: "Insufficient credits" });
}
} catch (error) {
Sentry.captureException(error);
Logger.error(error);
return res.status(500).json({ error: "Internal server error" });
}
@ -181,6 +206,11 @@ export async function searchController(req: Request, res: Response) {
});
return res.status(result.returnCode).json(result);
} catch (error) {
if (error instanceof Error && error.message.startsWith("Job wait")) {
return res.status(408).json({ error: "Request timed out" });
}
Sentry.captureException(error);
Logger.error(error);
return res.status(500).json({ error: error.message });
}

View File

@ -1,8 +1,8 @@
import { Request, Response } from "express";
import { Logger } from "../../../src/lib/logger";
import { getCrawl, getCrawlJobs } from "../../../src/lib/crawl-redis";
import { getScrapeQueue } from "../../../src/services/queue-service";
import { supabaseGetJobById } from "../../../src/lib/supabase-jobs";
import { getJobs } from "./crawl-status";
import * as Sentry from "@sentry/node";
export async function crawlJobStatusPreviewController(req: Request, res: Response) {
try {
@ -22,19 +22,7 @@ export async function crawlJobStatusPreviewController(req: Request, res: Respons
// }
// }
const jobs = (await Promise.all(jobIDs.map(async x => {
const job = await getScrapeQueue().getJob(x);
if (process.env.USE_DB_AUTHENTICATION === "true") {
const supabaseData = await supabaseGetJobById(job.id);
if (supabaseData) {
job.returnvalue = supabaseData.docs;
}
}
return job;
}))).sort((a, b) => a.timestamp - b.timestamp);
const jobs = (await getJobs(jobIDs)).sort((a, b) => a.timestamp - b.timestamp);
const jobStatuses = await Promise.all(jobs.map(x => x.getState()));
const jobStatus = sc.cancelled ? "failed" : jobStatuses.every(x => x === "completed") ? "completed" : jobStatuses.some(x => x === "failed") ? "failed" : "active";
@ -48,6 +36,7 @@ export async function crawlJobStatusPreviewController(req: Request, res: Respons
partial_data: jobStatus === "completed" ? [] : data.filter(x => x !== null),
});
} catch (error) {
Sentry.captureException(error);
Logger.error(error);
return res.status(500).json({ error: error.message });
}

View File

@ -1,7 +1,9 @@
import "dotenv/config";
import "./services/sentry"
import * as Sentry from "@sentry/node";
import express from "express";
import bodyParser from "body-parser";
import cors from "cors";
import "dotenv/config";
import { getScrapeQueue } from "./services/queue-service";
import { v0Router } from "./routes/v0";
import { initSDK } from "@hyperdx/node-opentelemetry";
@ -17,6 +19,8 @@ import { v1Router } from "./routes/v1";
import expressWs from "express-ws";
import { crawlStatusWSController } from "./controllers/v1/crawl-status-ws";
const { createBullBoard } = require("@bull-board/api");
const { BullAdapter } = require("@bull-board/api/bullAdapter");
const { ExpressAdapter } = require("@bull-board/express");
@ -120,6 +124,7 @@ if (cluster.isMaster) {
waitingJobs,
});
} catch (error) {
Sentry.captureException(error);
Logger.error(error);
return res.status(500).json({ error: error.message });
}
@ -171,6 +176,7 @@ if (cluster.isMaster) {
}, timeout);
}
} catch (error) {
Sentry.captureException(error);
Logger.debug(error);
}
};
@ -183,6 +189,8 @@ if (cluster.isMaster) {
res.send({ isProduction: global.isProduction });
});
Sentry.setupExpressErrorHandler(app);
Logger.info(`Worker ${process.pid} started`);
}
@ -195,3 +203,5 @@ if (cluster.isMaster) {
// sq.on("resumed", j => ScrapeEvents.logJobEvent(j, "resumed"));
// sq.on("removed", j => ScrapeEvents.logJobEvent(j, "removed"));

View File

@ -46,7 +46,7 @@ export async function generateCompletions(
return completionResult;
} catch (error) {
Logger.error(`Error generating completions: ${error}`);
throw new Error(`Error generating completions: ${error.message}`);
throw error;
}
default:
throw new Error("Invalid client");

View File

@ -15,7 +15,7 @@ const defaultPrompt =
function prepareOpenAIDoc(
document: Document,
mode: "markdown" | "raw-html"
): [OpenAI.Chat.Completions.ChatCompletionContentPart[], number] {
): [OpenAI.Chat.Completions.ChatCompletionContentPart[], number] | null {
let markdown = document.markdown;
@ -27,9 +27,10 @@ function prepareOpenAIDoc(
// Check if the markdown content exists in the document
if (!extractionTarget) {
throw new Error(
`${mode} content is missing in the document. This is likely due to an error in the scraping process. Please try again or reach out to help@mendable.ai`
);
return null;
// throw new Error(
// `${mode} content is missing in the document. This is likely due to an error in the scraping process. Please try again or reach out to help@mendable.ai`
// );
}
@ -64,7 +65,16 @@ export async function generateOpenAICompletions({
mode: "markdown" | "raw-html";
}): Promise<Document> {
const openai = client as OpenAI;
const [content, numTokens] = prepareOpenAIDoc(document, mode);
const preparedDoc = prepareOpenAIDoc(document, mode);
if (preparedDoc === null) {
return {
...document,
warning: "LLM extraction was not performed since the document's content is empty or missing.",
};
}
const [content, numTokens] = preparedDoc;
const completion = await openai.chat.completions.create({
model,

View File

@ -1,6 +1,6 @@
export const defaultOrigin = "api";
export const defaultTimeout = 45000; // 45 seconds
export const defaultTimeout = 60000; // 60 seconds
export const defaultPageOptions = {
onlyMainContent: false,
@ -12,7 +12,8 @@ export const defaultPageOptions = {
};
export const defaultCrawlerOptions = {
allowBackwardCrawling: false
allowBackwardCrawling: false,
limit: 10000
}
export const defaultCrawlPageOptions = {

View File

@ -26,6 +26,9 @@ export type PageOptions = {
removeTags?: string | string[];
onlyIncludeTags?: string | string[];
includeLinks?: boolean;
useFastMode?: boolean; // beta
disableJSDom?: boolean; // beta
atsv?: boolean; // beta
};
export type ExtractorOptions = {
@ -68,6 +71,7 @@ export type WebScraperOptions = {
concurrentRequests?: number;
bullJobId?: string;
priority?: number;
teamId?: string;
};
export interface DocumentUrl {
@ -144,4 +148,5 @@ export interface FireEngineOptions{
blockMedia?: boolean;
blockAds?: boolean;
disableJsDom?: boolean;
atsv?: boolean; // beta
}

View File

@ -12,7 +12,6 @@ import { Document } from "../lib/entities";
import { supabase_service } from "../services/supabase";
import { Logger } from "../lib/logger";
import { ScrapeEvents } from "../lib/scrape-events";
import { getScrapeQueue } from "../services/queue-service";
export async function startWebScraperPipeline({
job,
@ -95,6 +94,7 @@ export async function runWebScraper({
crawlerOptions: crawlerOptions,
pageOptions: pageOptions,
priority,
teamId: team_id
});
}
const docs = (await provider.getDocuments(false, (progress: Progress) => {

View File

@ -53,8 +53,8 @@ export class WebCrawler {
this.jobId = jobId;
this.initialUrl = initialUrl;
this.baseUrl = new URL(initialUrl).origin;
this.includes = includes ?? [];
this.excludes = excludes ?? [];
this.includes = Array.isArray(includes) ? includes : [];
this.excludes = Array.isArray(excludes) ? excludes : [];
this.limit = limit;
this.robotsTxtUrl = `${this.baseUrl}/robots.txt`;
this.robots = robotsParser(this.robotsTxtUrl, "");
@ -69,7 +69,13 @@ export class WebCrawler {
public filterLinks(sitemapLinks: string[], limit: number, maxDepth: number): string[] {
return sitemapLinks
.filter((link) => {
const url = new URL(link.trim(), this.baseUrl);
let url: URL;
try {
url = new URL(link.trim(), this.baseUrl);
} catch (error) {
Logger.debug(`Error processing link: ${link} | Error: ${error.message}`);
return false;
}
const path = url.pathname;
const depth = getURLDepth(url.toString());
@ -102,7 +108,12 @@ export class WebCrawler {
// Normalize the initial URL and the link to account for www and non-www versions
const normalizedInitialUrl = new URL(this.initialUrl);
const normalizedLink = new URL(link);
let normalizedLink;
try {
normalizedLink = new URL(link);
} catch (_) {
return false;
}
const initialHostname = normalizedInitialUrl.hostname.replace(/^www\./, '');
const linkHostname = normalizedLink.hostname.replace(/^www\./, '');
@ -261,9 +272,18 @@ export class WebCrawler {
public filterURL(href: string, url: string): string | null {
let fullUrl = href;
if (!href.startsWith("http")) {
fullUrl = new URL(href, this.baseUrl).toString();
try {
fullUrl = new URL(href, this.baseUrl).toString();
} catch (_) {
return null;
}
}
let urlObj;
try {
urlObj = new URL(fullUrl);
} catch (_) {
return null;
}
const urlObj = new URL(fullUrl);
const path = urlObj.pathname;
if (this.isInternalLink(fullUrl)) { // INTERNAL LINKS

View File

@ -16,7 +16,6 @@ import {
replacePathsWithAbsolutePaths,
} from "./utils/replacePaths";
import { generateCompletions } from "../../lib/LLM-extraction";
import { getScrapeQueue } from "../../../src/services/queue-service";
import { fetchAndProcessDocx } from "./utils/docxProcessor";
import { getAdjustedMaxDepth, getURLDepth } from "./utils/maxDepthUtils";
import { Logger } from "../../lib/logger";
@ -45,6 +44,7 @@ export class WebScraperDataProvider {
private allowBackwardCrawling: boolean = false;
private allowExternalContentLinks: boolean = false;
private priority?: number;
private teamId?: string;
authorize(): void {
throw new Error("Method not implemented.");
@ -75,6 +75,7 @@ export class WebScraperDataProvider {
this.extractorOptions,
existingHTML,
this.priority,
this.teamId,
);
processedUrls++;
if (inProgress) {
@ -613,6 +614,7 @@ export class WebScraperDataProvider {
this.allowExternalContentLinks =
options.crawlerOptions?.allowExternalContentLinks ?? false;
this.priority = options.priority;
this.teamId = options.teamId ?? null;
// make sure all urls start with https://
this.urls = this.urls.map((url) => {

View File

@ -5,6 +5,7 @@ import { generateRequestParams } from "../single_url";
import { fetchAndProcessPdf } from "../utils/pdfProcessor";
import { universalTimeout } from "../global";
import { Logger } from "../../../lib/logger";
import * as Sentry from "@sentry/node";
/**
* Scrapes a URL with Fire-Engine
@ -22,21 +23,23 @@ export async function scrapWithFireEngine({
waitFor = 0,
screenshot = false,
fullPageScreenshot = false,
pageOptions = { parsePDF: true },
pageOptions = { parsePDF: true, atsv: false, useFastMode: false, disableJsDom: false },
fireEngineOptions = {},
headers,
options,
priority,
teamId,
}: {
url: string;
waitFor?: number;
screenshot?: boolean;
fullPageScreenshot?: boolean;
pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean };
pageOptions?: { scrollXPaths?: string[]; parsePDF?: boolean, atsv?: boolean, useFastMode?: boolean, disableJsDom?: boolean };
fireEngineOptions?: FireEngineOptions;
headers?: Record<string, string>;
options?: any;
priority?: number;
teamId?: string;
}): Promise<FireEngineResponse> {
const logParams = {
url,
@ -51,11 +54,11 @@ export async function scrapWithFireEngine({
try {
const reqParams = await generateRequestParams(url);
const waitParam = reqParams["params"]?.wait ?? waitFor;
const engineParam = reqParams["params"]?.engine ?? reqParams["params"]?.fireEngineOptions?.engine ?? fireEngineOptions?.engine ?? "playwright";
const screenshotParam = reqParams["params"]?.screenshot ?? screenshot;
const fullPageScreenshotParam = reqParams["params"]?.fullPageScreenshot ?? fullPageScreenshot;
const fireEngineOptionsParam : FireEngineOptions = reqParams["params"]?.fireEngineOptions ?? fireEngineOptions;
let waitParam = reqParams["params"]?.wait ?? waitFor;
let engineParam = reqParams["params"]?.engine ?? reqParams["params"]?.fireEngineOptions?.engine ?? fireEngineOptions?.engine ?? "playwright";
let screenshotParam = reqParams["params"]?.screenshot ?? screenshot;
let fullPageScreenshotParam = reqParams["params"]?.fullPageScreenshot ?? fullPageScreenshot;
let fireEngineOptionsParam : FireEngineOptions = reqParams["params"]?.fireEngineOptions ?? fireEngineOptions;
let endpoint = "/scrape";
@ -70,48 +73,101 @@ export async function scrapWithFireEngine({
`⛏️ Fire-Engine (${engine}): Scraping ${url} | params: { wait: ${waitParam}, screenshot: ${screenshotParam}, fullPageScreenshot: ${fullPageScreenshot}, method: ${fireEngineOptionsParam?.method ?? "null"} }`
);
if (pageOptions?.useFastMode) {
fireEngineOptionsParam.engine = "tlsclient";
engine = "tlsclient";
}
const response = await axios.post(
process.env.FIRE_ENGINE_BETA_URL + endpoint,
{
url: url,
wait: waitParam,
screenshot: screenshotParam,
fullPageScreenshot: fullPageScreenshotParam,
headers: headers,
pageOptions: pageOptions,
priority,
...fireEngineOptionsParam,
},
{
headers: {
"Content-Type": "application/json",
// atsv is only available for beta customers
const betaCustomersString = process.env.BETA_CUSTOMERS;
const betaCustomers = betaCustomersString ? betaCustomersString.split(",") : [];
if (pageOptions?.atsv && betaCustomers.includes(teamId)) {
fireEngineOptionsParam.atsv = true;
} else {
pageOptions.atsv = false;
}
const axiosInstance = axios.create({
headers: { "Content-Type": "application/json" }
});
const startTime = Date.now();
const _response = await Sentry.startSpan({
name: "Call to fire-engine"
}, async span => {
return await axiosInstance.post(
process.env.FIRE_ENGINE_BETA_URL + endpoint,
{
url: url,
wait: waitParam,
screenshot: screenshotParam,
fullPageScreenshot: fullPageScreenshotParam,
headers: headers,
pageOptions: pageOptions,
disableJsDom: pageOptions?.disableJsDom ?? false,
priority,
engine,
instantReturn: true,
...fireEngineOptionsParam,
},
timeout: universalTimeout + waitParam,
}
);
{
headers: {
"Content-Type": "application/json",
...(Sentry.isInitialized() ? ({
"sentry-trace": Sentry.spanToTraceHeader(span),
"baggage": Sentry.spanToBaggageHeader(span),
}) : {}),
}
}
);
});
if (response.status !== 200) {
let checkStatusResponse = await axiosInstance.get(`${process.env.FIRE_ENGINE_BETA_URL}/scrape/${_response.data.jobId}`);
while (checkStatusResponse.data.processing && Date.now() - startTime < universalTimeout + waitParam) {
await new Promise(resolve => setTimeout(resolve, 1000)); // wait 1 second
checkStatusResponse = await axiosInstance.get(`${process.env.FIRE_ENGINE_BETA_URL}/scrape/${_response.data.jobId}`);
}
if (checkStatusResponse.data.processing) {
Logger.debug(`⛏️ Fire-Engine (${engine}): deleting request - jobId: ${_response.data.jobId}`);
axiosInstance.delete(
process.env.FIRE_ENGINE_BETA_URL + `/scrape/${_response.data.jobId}`, {
validateStatus: (status) => true
}
).catch((error) => {
Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to delete request - jobId: ${_response.data.jobId} | error: ${error}`);
});
Logger.debug(`⛏️ Fire-Engine (${engine}): Request timed out for ${url}`);
logParams.error_message = "Request timed out";
return { html: "", screenshot: "", pageStatusCode: null, pageError: "" };
}
if (checkStatusResponse.status !== 200 || checkStatusResponse.data.error) {
Logger.debug(
`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}`
`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${checkStatusResponse.status}`
);
logParams.error_message = response.data?.pageError;
logParams.response_code = response.data?.pageStatusCode;
logParams.error_message = checkStatusResponse.data?.pageError ?? checkStatusResponse.data?.error;
logParams.response_code = checkStatusResponse.data?.pageStatusCode;
if(response.data && response.data?.pageStatusCode !== 200) {
Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${response.status}`);
if(checkStatusResponse.data && checkStatusResponse.data?.pageStatusCode !== 200) {
Logger.debug(`⛏️ Fire-Engine (${engine}): Failed to fetch url: ${url} \t status: ${checkStatusResponse.data?.pageStatusCode}`);
}
const pageStatusCode = checkStatusResponse.data?.pageStatusCode ? checkStatusResponse.data?.pageStatusCode : checkStatusResponse.data?.error && checkStatusResponse.data?.error.includes("Dns resolution error for hostname") ? 404 : undefined;
return {
html: "",
screenshot: "",
pageStatusCode: response.data?.pageStatusCode,
pageError: response.data?.pageError,
pageStatusCode,
pageError: checkStatusResponse.data?.pageError ?? checkStatusResponse.data?.error,
};
}
const contentType = response.headers["content-type"];
const contentType = checkStatusResponse.data.responseHeaders?.["content-type"];
if (contentType && contentType.includes("application/pdf")) {
const { content, pageStatusCode, pageError } = await fetchAndProcessPdf(
url,
@ -122,18 +178,19 @@ export async function scrapWithFireEngine({
logParams.error_message = pageError;
return { html: content, screenshot: "", pageStatusCode, pageError };
} else {
const data = response.data;
const data = checkStatusResponse.data;
logParams.success =
(data.pageStatusCode >= 200 && data.pageStatusCode < 300) ||
data.pageStatusCode === 404;
logParams.html = data.content ?? "";
logParams.response_code = data.pageStatusCode;
logParams.error_message = data.pageError;
logParams.error_message = data.pageError ?? data.error;
return {
html: data.content ?? "",
screenshot: data.screenshot ?? "",
pageStatusCode: data.pageStatusCode,
pageError: data.pageError,
pageError: data.pageError ?? data.error,
};
}
} catch (error) {

View File

@ -43,6 +43,9 @@ export async function scrapWithScrapingBee(
transparent_status_code: "True",
},
});
Logger.info(
`⛏️ ScrapingBee: Scraping ${url}`
);
const contentType = response.headers["content-type"];
if (contentType && contentType.includes("application/pdf")) {
logParams.success = true;

View File

@ -126,6 +126,7 @@ export async function scrapSingleUrl(
extractorOptions?: ExtractorOptions,
existingHtml?: string,
priority?: number,
teamId?: string
): Promise<Document> {
pageOptions = {
includeMarkdown: pageOptions.includeMarkdown ?? true,
@ -179,7 +180,7 @@ export async function scrapSingleUrl(
case "fire-engine;chrome-cdp":
let engine: "playwright" | "chrome-cdp" | "tlsclient" = "playwright";
if(method === "fire-engine;chrome-cdp"){
if (method === "fire-engine;chrome-cdp") {
engine = "chrome-cdp";
}
@ -193,8 +194,10 @@ export async function scrapSingleUrl(
headers: pageOptions.headers,
fireEngineOptions: {
engine: engine,
atsv: pageOptions.atsv,
},
priority,
teamId,
});
scraperResponse.text = response.html;
scraperResponse.screenshot = response.screenshot;

View File

@ -15,6 +15,8 @@ const socialMediaBlocklist = [
'whatsapp.com',
'wechat.com',
'telegram.org',
'researchhub.com',
'youtube.com'
];
const allowedKeywords = [

View File

@ -49,7 +49,7 @@ export async function checkAlerts() {
};
const checkAll = async () => {
// await checkActiveJobs();
await checkActiveJobs();
await checkWaitingQueue();
};

View File

@ -2,16 +2,47 @@ import { Job, Queue } from "bullmq";
import { getScrapeQueue } from "./queue-service";
import { v4 as uuidv4 } from "uuid";
import { WebScraperOptions } from "../types";
import * as Sentry from "@sentry/node";
async function addScrapeJobRaw(
webScraperOptions: any,
options: any,
jobId: string,
): Promise<Job> {
return await getScrapeQueue().add(jobId, webScraperOptions, {
...options,
priority: webScraperOptions.crawl_id ? 20 : 10,
jobId,
});
}
export async function addScrapeJob(
webScraperOptions: WebScraperOptions,
options: any = {},
jobId: string = uuidv4(),
): Promise<Job> {
return await getScrapeQueue().add(jobId, webScraperOptions, {
priority: webScraperOptions.crawl_id ? 20 : 10,
...options,
jobId,
});
if (Sentry.isInitialized()) {
const size = JSON.stringify(webScraperOptions).length;
return await Sentry.startSpan({
name: "Add scrape job",
op: "queue.publish",
attributes: {
"messaging.message.id": jobId,
"messaging.destination.name": getScrapeQueue().name,
"messaging.message.body.size": size,
},
}, async (span) => {
return await addScrapeJobRaw({
...webScraperOptions,
sentry: {
trace: Sentry.spanToTraceHeader(span),
baggage: Sentry.spanToBaggageHeader(span),
size,
},
}, options, jobId);
});
} else {
return await addScrapeJobRaw(webScraperOptions, options, jobId);
}
}

View File

@ -35,6 +35,6 @@ export function getScrapeQueue() {
}
import { QueueEvents } from 'bullmq';
export const scrapeQueueEvents = new QueueEvents(scrapeQueueName, { connection: redisConnection });
// === REMOVED IN FAVOR OF POLLING -- NOT RELIABLE
// import { QueueEvents } from 'bullmq';
// export const scrapeQueueEvents = new QueueEvents(scrapeQueueName, { connection: redisConnection.duplicate() });

View File

@ -1,4 +1,6 @@
import "dotenv/config";
import "./sentry"
import * as Sentry from "@sentry/node";
import { CustomError } from "../lib/custom-error";
import {
getScrapeQueue,
@ -48,6 +50,7 @@ const processJobInternal = async (token: string, job: Job) => {
await job.extendLock(token, jobLockExtensionTime);
}, jobLockExtendInterval);
let err = null;
try {
const result = await processJob(job, token);
try{
@ -60,11 +63,14 @@ const processJobInternal = async (token: string, job: Job) => {
}
} catch (error) {
console.log("Job failed, error:", error);
Sentry.captureException(error);
err = error;
await job.moveToFailed(error, token, false);
} finally {
clearInterval(extendLockInterval);
}
return err;
};
let isShuttingDown = false;
@ -74,7 +80,7 @@ process.on("SIGINT", () => {
isShuttingDown = true;
});
const workerFun = async (queueName: string, processJobInternal: (token: string, job: Job) => Promise<void>) => {
const workerFun = async (queueName: string, processJobInternal: (token: string, job: Job) => Promise<any>) => {
const worker = new Worker(queueName, null, {
connection: redisConnection,
lockDuration: 1 * 60 * 1000, // 1 minute
@ -102,7 +108,47 @@ const workerFun = async (queueName: string, processJobInternal: (token: string,
const job = await worker.getNextJob(token);
if (job) {
processJobInternal(token, job);
if (job.data && job.data.sentry && Sentry.isInitialized()) {
Sentry.continueTrace({ sentryTrace: job.data.sentry.trace, baggage: job.data.sentry.baggage }, () => {
Sentry.startSpan({
name: "Scrape job",
attributes: {
job: job.id,
worker: process.env.FLY_MACHINE_ID ?? worker.id,
},
}, async (span) => {
await Sentry.startSpan({
name: "Process scrape job",
op: "queue.process",
attributes: {
"messaging.message.id": job.id,
"messaging.destination.name": getScrapeQueue().name,
"messaging.message.body.size": job.data.sentry.size,
"messaging.message.receive.latency": Date.now() - (job.processedOn ?? job.timestamp),
"messaging.message.retry.count": job.attemptsMade,
}
}, async () => {
const res = await processJobInternal(token, job);
if (res !== null) {
span.setStatus({ code: 2 }); // ERROR
} else {
span.setStatus({ code: 1 }); // OK
}
});
});
});
} else {
Sentry.startSpan({
name: "Scrape job",
attributes: {
job: job.id,
worker: process.env.FLY_MACHINE_ID ?? worker.id,
},
}, () => {
processJobInternal(token, job);
});
}
await sleep(gotJobInterval);
} else {
await sleep(connectionMonitorInterval);
@ -115,6 +161,20 @@ workerFun(scrapeQueueName, processJobInternal);
async function processJob(job: Job, token: string) {
Logger.info(`🐂 Worker taking job ${job.id}`);
// Check if the job URL is researchhub and block it immediately
// TODO: remove this once solve the root issue
if (job.data.url && (job.data.url.includes("researchhub.com") || job.data.url.includes("ebay.com") || job.data.url.includes("youtube.com") || job.data.url.includes("microsoft.com") )) {
Logger.info(`🐂 Blocking job ${job.id} with URL ${job.data.url}`);
const data = {
success: false,
docs: [],
project_id: job.data.project_id,
error: "URL is blocked. Suspecious activity detected. Please contact hello@firecrawl.com if you believe this is an error.",
};
await job.moveToCompleted(data.docs, token, false);
return data;
}
try {
job.updateProgress({
current: 1,
@ -123,6 +183,7 @@ async function processJob(job: Job, token: string) {
current_url: "",
});
const start = Date.now();
const { success, message, docs } = await startWebScraperPipeline({
job,
token,
@ -276,6 +337,12 @@ async function processJob(job: Job, token: string) {
} catch (error) {
Logger.error(`🐂 Job errored ${job.id} - ${error}`);
Sentry.captureException(error, {
data: {
job: job.id
},
})
if (error instanceof CustomError) {
// Here we handle the error, then save the failed job
Logger.error(error.message); // or any other error handling

View File

@ -103,7 +103,7 @@ export function getRateLimiter(
plan?: string
) {
if (token.includes("a01ccae") || token.includes("6254cf9")) {
if (token.includes("a01ccae") || token.includes("6254cf9") || token.includes("0f96e673")) {
return testSuiteRateLimiter;
}

View File

@ -0,0 +1,18 @@
// Import with `import * as Sentry from "@sentry/node"` if you are using ESM
import * as Sentry from "@sentry/node";
import { nodeProfilingIntegration } from "@sentry/profiling-node";
import { Logger } from "../lib/logger";
if (process.env.SENTRY_DSN) {
Logger.info("Setting up Sentry...");
Sentry.init({
dsn: process.env.SENTRY_DSN,
integrations: [
nodeProfilingIntegration(),
],
tracesSampleRate: process.env.SENTRY_ENVIRONMENT === "dev" ? 1.0 : 0.045,
profilesSampleRate: 1.0,
serverName: process.env.FLY_MACHINE_ID,
environment: process.env.SENTRY_ENVIRONMENT ?? "production",
});
}

View File

@ -2,12 +2,22 @@
"compilerOptions": {
"rootDir": "./src",
"lib": ["es6","DOM"],
"target": "ES2020", // or higher
// or higher
"target": "ES2020",
"module": "commonjs",
"esModuleInterop": true,
"sourceMap": true,
"outDir": "./dist/src",
"moduleResolution": "node",
"baseUrl": ".",
"paths": {
"*": ["node_modules/*", "src/types/*"],
},
"inlineSources": true
},
"include": ["src/","src/**/*", "services/db/supabase.ts", "utils/utils.ts", "services/db/supabaseEmbeddings.ts", "utils/EventEmmitter.ts", "src/services/queue-service.ts"]
}

2
apps/redis/.dockerignore Normal file
View File

@ -0,0 +1,2 @@
.git
fly.toml

6
apps/redis/Dockerfile Normal file
View File

@ -0,0 +1,6 @@
ARG REDIS_VERSION=7.2.5
FROM bitnami/redis:${REDIS_VERSION}
COPY start-redis-server.sh /usr/bin/start-redis-server.sh
CMD ["/usr/bin/start-redis-server.sh"]

2
apps/redis/Procfile Normal file
View File

@ -0,0 +1,2 @@
redis: /usr/bin/start-redis-server.sh
metrics: /usr/local/bin/redis_exporter -redis.addr localhost:6379 -web.listen-address ":9091"

48
apps/redis/README.md Normal file
View File

@ -0,0 +1,48 @@
The official repository for Running Redis on Fly.io. Find the accompanying Docker image at [flyio/redis](https://hub.docker.com/repository/docker/flyio/redis).
## Usage
This installation requires setting a password on Redis. To do that, run `fly secrets set REDIS_PASSWORD=mypassword` before deploying. Keep
track of this password - it won't be visible again after deployment!
If you need no customizations, you can deploy using the official Docker image. See `fly.toml` in this repository for an example to get started with.
## Runtime requirements
By default, this Redis installation will only accept connections on the private IPv6 network, on the standard port 6379.
If you want to access it from the public internet, add a `[[services]]` section to your `fly.toml`. An example is included in this repo for accessing Redis on port 10000.
We recommend adding persistent storage for Redis data. If you skip this step, data will be lost across deploys or restarts. For Fly apps, the volume needs to be in the same region as the app instances. For example:
```cmd
flyctl volumes create redis_server --region ord
```
```out
Name: redis_server
Region: ord
Size GB: 10
Created at: 02 Nov 20 19:55 UTC
```
To connect this volume to the app, `fly.toml` includes a `[mounts]` entry.
```
[mounts]
source = "redis_server"
destination = "/data"
```
When the app starts, that volume will be mounted on /data.
## Cutting a release
If you have write access to this repo, you can ship a prerelease or full release with:
```
scripts/bump_version.sh
```
or
```
scripts/bump_version.sh prerel
```

22
apps/redis/fly.toml Normal file
View File

@ -0,0 +1,22 @@
app = 'firecrawl-dragonfly'
primary_region = 'iad'
[[mounts]]
source = 'firecrawl_redis'
destination = '/data'
[[services]]
protocol = 'tcp'
internal_port = 6379
[[services.tcp_checks]]
interval = '10s'
timeout = '2s'
[[vm]]
size = 'performance-4x'
memory = '32gb'
[[metrics]]
port = 9091
path = '/metrics'

View File

@ -0,0 +1,91 @@
#!/usr/bin/env bash
set -euo pipefail
ORIGIN=${ORIGIN:-origin}
bump=${1:-patch}
prerel=${2:-none}
if [[ $bump == "prerel" ]]; then
bump="patch"
prerel="prerel"
fi
if [[ $(git status --porcelain) != "" ]]; then
echo "Error: repo is dirty. Run git status, clean repo and try again."
exit 1
elif [[ $(git status --porcelain -b | grep -e "ahead" -e "behind") != "" ]]; then
echo "Error: repo has unpushed commits. Push commits to remote and try again."
exit 1
fi
BRANCH="$(git rev-parse --abbrev-ref HEAD)"
if [[ "$prerel" == "prerel" && "$BRANCH" != "prerelease" ]]; then
# echo "❌ Sorry, you can only cut a pre-release from the 'prelease' branch"
# echo "Run 'git checkout prerelease && git pull origin prerelease' and try again."
# exit 1
echo "⚠️ Pre-releases should be cut from the 'prerelease' branch"
echo "Please make sure you're not overwriting someone else's prerelease!"
echo
read -p "Release anyway? " -n 1 -r
echo
if [[ $REPLY =~ ^[^Yy]$ ]]; then
echo Aborting.
exit 1
fi
fi
if [[ "$prerel" != "prerel" && "$BRANCH" != "main" ]]; then
echo "❌ Sorry, you can only cut a release from the 'main' branch"
echo "Run 'git checkout main && git pull origin main' and try again."
exit 1
fi
git fetch
if [[ "$(git rev-parse HEAD 2>&1)" != "$(git rev-parse '@{u}' 2>&1)" ]]; then
echo "There are upstream commits that won't be included in this release."
echo "You probably want to exit, run 'git pull', then release."
echo
read -p "Release anyway? " -n 1 -r
echo
if [[ $REPLY =~ ^[^Yy]$ ]]; then
echo Aborting.
exit 1
fi
fi
dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
previous_version="$("$dir"/../scripts/version.sh -s)"
if [[ $prerel == "prerel" ]]; then
prerelversion=$("$dir"/../scripts/semver get prerel "$previous_version")
if [[ $prerelversion == "" ]]; then
new_version=$("$dir"/../scripts/semver bump "$bump" "$previous_version")
new_version=$("$dir"/../scripts/semver bump prerel pre-1 "$new_version")
else
prerel=pre-$((${prerelversion#pre-} + 1))
new_version=$("$dir"/../scripts/semver bump prerel "$prerel" "$previous_version")
fi
else
prerelversion=$("$dir"/../scripts/semver get prerel "$previous_version")
if [[ $prerelversion == "" ]]; then
new_version=$("$dir"/../scripts/semver bump "$bump" "$previous_version")
else
new_version=${previous_version//-$prerelversion/}
fi
fi
new_version="v$new_version"
echo "Bumping version from v${previous_version} to ${new_version}"
read -p "Are you sure? " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]
then
git tag -m "release ${new_version}" -a "$new_version" && git push "${ORIGIN}" tag "$new_version"
echo "done"
fi

200
apps/redis/scripts/semver Executable file
View File

@ -0,0 +1,200 @@
#!/usr/bin/env bash
set -o errexit -o nounset -o pipefail
SEMVER_REGEX="^[vV]?(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)\\.(0|[1-9][0-9]*)(\\-[0-9A-Za-z-]+(\\.[0-9A-Za-z-]+)*)?(\\+[0-9A-Za-z-]+(\\.[0-9A-Za-z-]+)*)?$"
PROG=semver
PROG_VERSION=2.1.0
USAGE="\
Usage:
$PROG bump (major|minor|patch|release|prerel <prerel>|build <build>) <version>
$PROG compare <version> <other_version>
$PROG get (major|minor|patch|release|prerel|build) <version>
$PROG --help
$PROG --version
Arguments:
<version> A version must match the following regex pattern:
\"${SEMVER_REGEX}\".
In english, the version must match X.Y.Z(-PRERELEASE)(+BUILD)
where X, Y and Z are positive integers, PRERELEASE is an optional
string composed of alphanumeric characters and hyphens and
BUILD is also an optional string composed of alphanumeric
characters and hyphens.
<other_version> See <version> definition.
<prerel> String that must be composed of alphanumeric characters and hyphens.
<build> String that must be composed of alphanumeric characters and hyphens.
Options:
-v, --version Print the version of this tool.
-h, --help Print this help message.
Commands:
bump Bump <version> by one of major, minor, patch, prerel, build
or a forced potentially conflicting version. The bumped version is
shown to stdout.
compare Compare <version> with <other_version>, output to stdout the
following values: -1 if <other_version> is newer, 0 if equal, 1 if
older.
get Extract given part of <version>, where part is one of major, minor,
patch, prerel, build."
function error {
echo -e "$1" >&2
exit 1
}
function usage-help {
error "$USAGE"
}
function usage-version {
echo -e "${PROG}: $PROG_VERSION"
exit 0
}
function validate-version {
local version=$1
if [[ "$version" =~ $SEMVER_REGEX ]]; then
# if a second argument is passed, store the result in var named by $2
if [ "$#" -eq "2" ]; then
local major=${BASH_REMATCH[1]}
local minor=${BASH_REMATCH[2]}
local patch=${BASH_REMATCH[3]}
local prere=${BASH_REMATCH[4]}
local build=${BASH_REMATCH[6]}
eval "$2=(\"$major\" \"$minor\" \"$patch\" \"$prere\" \"$build\")"
else
echo "$version"
fi
else
error "version $version does not match the semver scheme 'X.Y.Z(-PRERELEASE)(+BUILD)'. See help for more information."
fi
}
function compare-version {
validate-version "$1" V
validate-version "$2" V_
# MAJOR, MINOR and PATCH should compare numerically
for i in 0 1 2; do
local diff=$((${V[$i]} - ${V_[$i]}))
if [[ $diff -lt 0 ]]; then
echo -1; return 0
elif [[ $diff -gt 0 ]]; then
echo 1; return 0
fi
done
# PREREL should compare with the ASCII order.
if [[ -z "${V[3]}" ]] && [[ -n "${V_[3]}" ]]; then
echo 1; return 0;
elif [[ -n "${V[3]}" ]] && [[ -z "${V_[3]}" ]]; then
echo -1; return 0;
elif [[ -n "${V[3]}" ]] && [[ -n "${V_[3]}" ]]; then
if [[ "${V[3]}" > "${V_[3]}" ]]; then
echo 1; return 0;
elif [[ "${V[3]}" < "${V_[3]}" ]]; then
echo -1; return 0;
fi
fi
echo 0
}
function command-bump {
local new; local version; local sub_version; local command;
case $# in
2) case $1 in
major|minor|patch|release) command=$1; version=$2;;
*) usage-help;;
esac ;;
3) case $1 in
prerel|build) command=$1; sub_version=$2 version=$3 ;;
*) usage-help;;
esac ;;
*) usage-help;;
esac
validate-version "$version" parts
# shellcheck disable=SC2154
local major="${parts[0]}"
local minor="${parts[1]}"
local patch="${parts[2]}"
local prere="${parts[3]}"
local build="${parts[4]}"
case "$command" in
major) new="$((major + 1)).0.0";;
minor) new="${major}.$((minor + 1)).0";;
patch) new="${major}.${minor}.$((patch + 1))";;
release) new="${major}.${minor}.${patch}";;
prerel) new=$(validate-version "${major}.${minor}.${patch}-${sub_version}");;
build) new=$(validate-version "${major}.${minor}.${patch}${prere}+${sub_version}");;
*) usage-help ;;
esac
echo "$new"
exit 0
}
function command-compare {
local v; local v_;
case $# in
2) v=$(validate-version "$1"); v_=$(validate-version "$2") ;;
*) usage-help ;;
esac
compare-version "$v" "$v_"
exit 0
}
# shellcheck disable=SC2034
function command-get {
local part version
if [[ "$#" -ne "2" ]] || [[ -z "$1" ]] || [[ -z "$2" ]]; then
usage-help
exit 0
fi
part="$1"
version="$2"
validate-version "$version" parts
local major="${parts[0]}"
local minor="${parts[1]}"
local patch="${parts[2]}"
local prerel="${parts[3]:1}"
local build="${parts[4]:1}"
case "$part" in
major|minor|patch|release|prerel|build) echo "${!part}" ;;
*) usage-help ;;
esac
exit 0
}
case $# in
0) echo "Unknown command: $*"; usage-help;;
esac
case $1 in
--help|-h) echo -e "$USAGE"; exit 0;;
--version|-v) usage-version ;;
bump) shift; command-bump "$@";;
get) shift; command-get "$@";;
compare) shift; command-compare "$@";;
*) echo "Unknown arguments: $*"; usage-help;;
esac

5
apps/redis/scripts/version.sh Executable file
View File

@ -0,0 +1,5 @@
ORIGIN=${ORIGIN:-origin}
version=$(git fetch --tags "${ORIGIN}" &>/dev/null | git -c "versionsort.prereleasesuffix=-pre" tag -l --sort=version:refname | grep -v dev | grep -vE '^v2$' | grep -vE '^v1$' | tail -n1 | cut -c 2-)
echo "$version"

View File

@ -0,0 +1,30 @@
#!/bin/bash
set -e
sysctl vm.overcommit_memory=1 || true
sysctl net.core.somaxconn=1024 || true
PW_ARG=""
if [[ ! -z "${REDIS_PASSWORD}" ]]; then
PW_ARG="--requirepass $REDIS_PASSWORD"
fi
# Set maxmemory-policy to 'allkeys-lru' for caching servers that should always evict old keys
: ${MAXMEMORY_POLICY:="volatile-lru"}
: ${APPENDONLY:="no"}
: ${FLY_VM_MEMORY_MB:=512}
if [ "${NOSAVE}" = "" ] ; then
: ${SAVE:="3600 1 300 100 60 10000"}
fi
# Set maxmemory to 10% of available memory
MAXMEMORY=$(($FLY_VM_MEMORY_MB*80/100))
mkdir /data/redis
redis-server $PW_ARG \
--dir /data/redis \
--maxmemory "${MAXMEMORY}mb" \
--maxmemory-policy $MAXMEMORY_POLICY \
--appendonly $APPENDONLY \
--save "$SAVE"