mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
Merge branch 'v1-webscraper' of https://github.com/mendableai/firecrawl into v1-webscraper
This commit is contained in:
commit
0c05d096a9
|
@ -57,6 +57,8 @@
|
|||
"@nangohq/node": "^0.40.8",
|
||||
"@sentry/node": "^8.13.0",
|
||||
"@supabase/supabase-js": "^2.44.2",
|
||||
"@types/express-ws": "^3.0.4",
|
||||
"@types/ws": "^8.5.12",
|
||||
"ajv": "^8.16.0",
|
||||
"async": "^3.2.5",
|
||||
"async-mutex": "^0.5.0",
|
||||
|
@ -71,6 +73,7 @@
|
|||
"date-fns": "^3.6.0",
|
||||
"dotenv": "^16.3.1",
|
||||
"express-rate-limit": "^7.3.1",
|
||||
"express-ws": "^5.0.2",
|
||||
"form-data": "^4.0.0",
|
||||
"glob": "^10.4.2",
|
||||
"gpt3-tokenizer": "^1.1.5",
|
||||
|
@ -105,6 +108,7 @@
|
|||
"unstructured-client": "^0.11.3",
|
||||
"uuid": "^10.0.0",
|
||||
"wordpos": "^2.1.0",
|
||||
"ws": "^8.18.0",
|
||||
"xml2js": "^0.6.2",
|
||||
"zod": "^3.23.8",
|
||||
"zod-to-json-schema": "^3.23.1"
|
||||
|
|
|
@ -41,6 +41,12 @@ importers:
|
|||
'@supabase/supabase-js':
|
||||
specifier: ^2.44.2
|
||||
version: 2.44.2
|
||||
'@types/express-ws':
|
||||
specifier: ^3.0.4
|
||||
version: 3.0.4
|
||||
'@types/ws':
|
||||
specifier: ^8.5.12
|
||||
version: 8.5.12
|
||||
ajv:
|
||||
specifier: ^8.16.0
|
||||
version: 8.16.0
|
||||
|
@ -83,6 +89,9 @@ importers:
|
|||
express-rate-limit:
|
||||
specifier: ^7.3.1
|
||||
version: 7.3.1(express@4.19.2)
|
||||
express-ws:
|
||||
specifier: ^5.0.2
|
||||
version: 5.0.2(express@4.19.2)
|
||||
form-data:
|
||||
specifier: ^4.0.0
|
||||
version: 4.0.0
|
||||
|
@ -106,7 +115,7 @@ importers:
|
|||
version: 0.0.28
|
||||
langchain:
|
||||
specifier: ^0.2.8
|
||||
version: 0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1)
|
||||
version: 0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
||||
languagedetect:
|
||||
specifier: ^2.0.0
|
||||
version: 2.0.0
|
||||
|
@ -185,6 +194,9 @@ importers:
|
|||
wordpos:
|
||||
specifier: ^2.1.0
|
||||
version: 2.1.0
|
||||
ws:
|
||||
specifier: ^8.18.0
|
||||
version: 8.18.0
|
||||
xml2js:
|
||||
specifier: ^0.6.2
|
||||
version: 0.6.2
|
||||
|
@ -1559,6 +1571,9 @@ packages:
|
|||
'@types/express-serve-static-core@4.19.3':
|
||||
resolution: {integrity: sha512-KOzM7MhcBFlmnlr/fzISFF5vGWVSvN6fTd4T+ExOt08bA/dA5kpSzY52nMsI1KDFmUREpJelPYyuslLRSjjgCg==}
|
||||
|
||||
'@types/express-ws@3.0.4':
|
||||
resolution: {integrity: sha512-Yjj18CaivG5KndgcvzttWe8mPFinPCHJC2wvyQqVzA7hqeufM8EtWMj6mpp5omg3s8XALUexhOu8aXAyi/DyJQ==}
|
||||
|
||||
'@types/express@4.17.21':
|
||||
resolution: {integrity: sha512-ejlPM315qwLpaQlQDTjPdsUFSc6ZsP4AN6AlWnogPjQ7CVi7PYF3YVz+CY3jE2pwYf7E/7HlDAN0rV2GxTG0HQ==}
|
||||
|
||||
|
@ -1661,8 +1676,8 @@ packages:
|
|||
'@types/whatwg-url@11.0.5':
|
||||
resolution: {integrity: sha512-coYR071JRaHa+xoEvvYqvnIHaVqaYrLPbsufM9BF63HkwI5Lgmy2QR8Q5K/lYDYo5AK82wOvSOS0UsLTpTG7uQ==}
|
||||
|
||||
'@types/ws@8.5.10':
|
||||
resolution: {integrity: sha512-vmQSUcfalpIq0R9q7uTo2lXs6eGIpt9wtnLdMv9LVpIjCA/+ufZRozlVoVelIYixx1ugCBKDhn89vnsEGOCx9A==}
|
||||
'@types/ws@8.5.12':
|
||||
resolution: {integrity: sha512-3tPRkv1EtkDpzlgyKyI8pGsGZAGPEaXeu0DOj5DI25Ja91bdAYddYHbADRYVrZMRbfW+1l5YwXVDKohDJNQxkQ==}
|
||||
|
||||
'@types/yargs-parser@21.0.3':
|
||||
resolution: {integrity: sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ==}
|
||||
|
@ -2416,6 +2431,12 @@ packages:
|
|||
peerDependencies:
|
||||
express: 4 || 5 || ^5.0.0-beta.1
|
||||
|
||||
express-ws@5.0.2:
|
||||
resolution: {integrity: sha512-0uvmuk61O9HXgLhGl3QhNSEtRsQevtmbL94/eILaliEADZBHZOQUAiHFrGPrgsjikohyrmSG5g+sCfASTt0lkQ==}
|
||||
engines: {node: '>=4.5.0'}
|
||||
peerDependencies:
|
||||
express: ^4.0.0 || ^5.0.0-alpha.1
|
||||
|
||||
express@4.19.2:
|
||||
resolution: {integrity: sha512-5T6nhjsT+EOMzuck8JjBHARTHfMht0POzlA60WV2pMD3gyXw2LZnZ+ueGdNxG+0calOJcWKbpFcuzLZ91YWq9Q==}
|
||||
engines: {node: '>= 0.10.0'}
|
||||
|
@ -4547,8 +4568,20 @@ packages:
|
|||
resolution: {integrity: sha512-+QU2zd6OTD8XWIJCbffaiQeH9U73qIqafo1x6V1snCWYGJf6cVE0cDR4D8xRzcEnfI21IFrUPzPGtcPf8AC+Rw==}
|
||||
engines: {node: ^14.17.0 || ^16.13.0 || >=18.0.0}
|
||||
|
||||
ws@8.17.1:
|
||||
resolution: {integrity: sha512-6XQFvXTkbfUOZOKKILFG1PDK2NDQs4azKQl26T0YS5CxqWLgXajbPZ+h4gZekJyRqFU8pvnbAbbs/3TgRPy+GQ==}
|
||||
ws@7.5.10:
|
||||
resolution: {integrity: sha512-+dbF1tHwZpXcbOJdVOkzLDxZP1ailvSxM6ZweXTegylPny803bFhA+vqBYw4s31NSAk4S2Qz+AKXK9a4wkdjcQ==}
|
||||
engines: {node: '>=8.3.0'}
|
||||
peerDependencies:
|
||||
bufferutil: ^4.0.1
|
||||
utf-8-validate: ^5.0.2
|
||||
peerDependenciesMeta:
|
||||
bufferutil:
|
||||
optional: true
|
||||
utf-8-validate:
|
||||
optional: true
|
||||
|
||||
ws@8.18.0:
|
||||
resolution: {integrity: sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw==}
|
||||
engines: {node: '>=10.0.0'}
|
||||
peerDependencies:
|
||||
bufferutil: ^4.0.1
|
||||
|
@ -5185,13 +5218,13 @@ snapshots:
|
|||
|
||||
'@js-sdsl/ordered-map@4.4.2': {}
|
||||
|
||||
'@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2)':
|
||||
'@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)':
|
||||
dependencies:
|
||||
ansi-styles: 5.2.0
|
||||
camelcase: 6.3.0
|
||||
decamelize: 1.2.0
|
||||
js-tiktoken: 1.0.12
|
||||
langsmith: 0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2)
|
||||
langsmith: 0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)
|
||||
ml-distance: 4.0.1
|
||||
mustache: 4.2.0
|
||||
p-queue: 6.6.2
|
||||
|
@ -5203,9 +5236,9 @@ snapshots:
|
|||
- langchain
|
||||
- openai
|
||||
|
||||
'@langchain/openai@0.2.1(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))':
|
||||
'@langchain/openai@0.2.1(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))':
|
||||
dependencies:
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2)
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)
|
||||
js-tiktoken: 1.0.12
|
||||
openai: 4.52.2
|
||||
zod: 3.23.8
|
||||
|
@ -5214,9 +5247,9 @@ snapshots:
|
|||
- encoding
|
||||
- langchain
|
||||
|
||||
'@langchain/textsplitters@0.0.3(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2)':
|
||||
'@langchain/textsplitters@0.0.3(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)':
|
||||
dependencies:
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2)
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)
|
||||
js-tiktoken: 1.0.12
|
||||
transitivePeerDependencies:
|
||||
- langchain
|
||||
|
@ -6374,8 +6407,8 @@ snapshots:
|
|||
dependencies:
|
||||
'@supabase/node-fetch': 2.6.15
|
||||
'@types/phoenix': 1.6.5
|
||||
'@types/ws': 8.5.10
|
||||
ws: 8.17.1
|
||||
'@types/ws': 8.5.12
|
||||
ws: 8.18.0
|
||||
transitivePeerDependencies:
|
||||
- bufferutil
|
||||
- utf-8-validate
|
||||
|
@ -6472,6 +6505,12 @@ snapshots:
|
|||
'@types/range-parser': 1.2.7
|
||||
'@types/send': 0.17.4
|
||||
|
||||
'@types/express-ws@3.0.4':
|
||||
dependencies:
|
||||
'@types/express': 4.17.21
|
||||
'@types/express-serve-static-core': 4.19.3
|
||||
'@types/ws': 8.5.12
|
||||
|
||||
'@types/express@4.17.21':
|
||||
dependencies:
|
||||
'@types/body-parser': 1.19.5
|
||||
|
@ -6595,7 +6634,7 @@ snapshots:
|
|||
dependencies:
|
||||
'@types/webidl-conversions': 7.0.3
|
||||
|
||||
'@types/ws@8.5.10':
|
||||
'@types/ws@8.5.12':
|
||||
dependencies:
|
||||
'@types/node': 20.14.1
|
||||
|
||||
|
@ -7336,6 +7375,14 @@ snapshots:
|
|||
dependencies:
|
||||
express: 4.19.2
|
||||
|
||||
express-ws@5.0.2(express@4.19.2):
|
||||
dependencies:
|
||||
express: 4.19.2
|
||||
ws: 7.5.10
|
||||
transitivePeerDependencies:
|
||||
- bufferutil
|
||||
- utf-8-validate
|
||||
|
||||
express@4.19.2:
|
||||
dependencies:
|
||||
accepts: 1.3.8
|
||||
|
@ -8248,17 +8295,17 @@ snapshots:
|
|||
|
||||
kleur@3.0.3: {}
|
||||
|
||||
langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1):
|
||||
langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0):
|
||||
dependencies:
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2)
|
||||
'@langchain/openai': 0.2.1(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))
|
||||
'@langchain/textsplitters': 0.0.3(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2)
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)
|
||||
'@langchain/openai': 0.2.1(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))
|
||||
'@langchain/textsplitters': 0.0.3(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)
|
||||
binary-extensions: 2.3.0
|
||||
js-tiktoken: 1.0.12
|
||||
js-yaml: 4.1.0
|
||||
jsonpointer: 5.0.1
|
||||
langchainhub: 0.0.11
|
||||
langsmith: 0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2)
|
||||
langsmith: 0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)
|
||||
ml-distance: 4.0.1
|
||||
openapi-types: 12.1.3
|
||||
p-retry: 4.6.2
|
||||
|
@ -8278,14 +8325,14 @@ snapshots:
|
|||
pdf-parse: 1.1.1
|
||||
puppeteer: 22.12.1(typescript@5.4.5)
|
||||
redis: 4.6.14
|
||||
ws: 8.17.1
|
||||
ws: 8.18.0
|
||||
transitivePeerDependencies:
|
||||
- encoding
|
||||
- openai
|
||||
|
||||
langchainhub@0.0.11: {}
|
||||
|
||||
langsmith@0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2):
|
||||
langsmith@0.1.34(@langchain/core@0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2))(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2):
|
||||
dependencies:
|
||||
'@types/uuid': 9.0.8
|
||||
commander: 10.0.1
|
||||
|
@ -8294,8 +8341,8 @@ snapshots:
|
|||
p-retry: 4.6.2
|
||||
uuid: 9.0.1
|
||||
optionalDependencies:
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1))(openai@4.52.2)
|
||||
langchain: 0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.17.1)
|
||||
'@langchain/core': 0.2.12(langchain@0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0))(openai@4.52.2)
|
||||
langchain: 0.2.8(@supabase/supabase-js@2.44.2)(axios@1.7.2)(cheerio@1.0.0-rc.12)(handlebars@4.7.8)(html-to-text@9.0.5)(ioredis@5.4.1)(mammoth@1.7.2)(mongodb@6.6.2(socks@2.8.3))(openai@4.52.2)(pdf-parse@1.1.1)(puppeteer@22.12.1(typescript@5.4.5))(redis@4.6.14)(ws@8.18.0)
|
||||
openai: 4.52.2
|
||||
|
||||
languagedetect@2.0.0: {}
|
||||
|
@ -8999,7 +9046,7 @@ snapshots:
|
|||
chromium-bidi: 0.5.24(devtools-protocol@0.0.1299070)
|
||||
debug: 4.3.5
|
||||
devtools-protocol: 0.0.1299070
|
||||
ws: 8.17.1
|
||||
ws: 8.18.0
|
||||
transitivePeerDependencies:
|
||||
- bufferutil
|
||||
- supports-color
|
||||
|
@ -9681,7 +9728,9 @@ snapshots:
|
|||
imurmurhash: 0.1.4
|
||||
signal-exit: 4.1.0
|
||||
|
||||
ws@8.17.1: {}
|
||||
ws@7.5.10: {}
|
||||
|
||||
ws@8.18.0: {}
|
||||
|
||||
xml2js@0.6.2:
|
||||
dependencies:
|
||||
|
|
148
apps/api/src/controllers/v1/crawl-status-ws.ts
Normal file
148
apps/api/src/controllers/v1/crawl-status-ws.ts
Normal file
|
@ -0,0 +1,148 @@
|
|||
import { authMiddleware } from "../../routes/v1";
|
||||
import { RateLimiterMode } from "../../types";
|
||||
import { authenticateUser } from "../v0/auth";
|
||||
import { CrawlStatusParams, CrawlStatusResponse, Document, ErrorResponse, legacyDocumentConverter, RequestWithAuth } from "./types";
|
||||
import { WebSocket } from "ws";
|
||||
import { v4 as uuidv4 } from "uuid";
|
||||
import { Logger } from "../../lib/logger";
|
||||
import { getCrawl, getCrawlExpiry, getCrawlJobs, getDoneJobsOrdered, getDoneJobsOrderedLength, isCrawlFinished, isCrawlFinishedLocked } from "../../lib/crawl-redis";
|
||||
import { getScrapeQueue, scrapeQueueEvents } from "../../services/queue-service";
|
||||
import { getJob, getJobs } from "./crawl-status";
|
||||
|
||||
type ErrorMessage = {
|
||||
type: "error",
|
||||
error: string,
|
||||
}
|
||||
|
||||
type CatchupMessage = {
|
||||
type: "catchup",
|
||||
data: CrawlStatusResponse,
|
||||
}
|
||||
|
||||
type DocumentMessage = {
|
||||
type: "document",
|
||||
data: Document,
|
||||
}
|
||||
|
||||
type DoneMessage = { type: "done" }
|
||||
|
||||
type Message = ErrorMessage | CatchupMessage | DoneMessage | DocumentMessage;
|
||||
|
||||
function send(ws: WebSocket, msg: Message) {
|
||||
if (ws.readyState === 1) {
|
||||
return new Promise((resolve, reject) => {
|
||||
ws.send(JSON.stringify(msg), (err) => {
|
||||
if (err) reject(err);
|
||||
else resolve(null);
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function close(ws: WebSocket, code: number, msg: Message) {
|
||||
if (ws.readyState <= 1) {
|
||||
ws.close(code, JSON.stringify(msg));
|
||||
}
|
||||
}
|
||||
|
||||
async function crawlStatusWS(ws: WebSocket, req: RequestWithAuth<CrawlStatusParams, undefined, undefined>) {
|
||||
const sc = await getCrawl(req.params.jobId);
|
||||
if (!sc) {
|
||||
return close(ws, 1008, { type: "error", error: "Job not found" });
|
||||
}
|
||||
|
||||
if (sc.team_id !== req.auth.team_id) {
|
||||
return close(ws, 3003, { type: "error", error: "Forbidden" });
|
||||
}
|
||||
|
||||
let doneJobIDs = [];
|
||||
|
||||
const completedListener = async e => {
|
||||
const job = await getScrapeQueue().getJob(e.jobId)
|
||||
if (job.data.crawl_id === req.params.jobId) {
|
||||
if (doneJobIDs.includes(job.id)) return;
|
||||
const j = await getJob(job.id);
|
||||
if (j.returnvalue) {
|
||||
send(ws, {
|
||||
type: "document",
|
||||
data: legacyDocumentConverter(j.returnvalue),
|
||||
});
|
||||
if (await isCrawlFinishedLocked(req.params.jobId)) {
|
||||
await new Promise((resolve) => setTimeout(() => resolve(true), 5000)) // wait for last events to pour in
|
||||
scrapeQueueEvents.removeListener("completed", completedListener);
|
||||
close(ws, 1000, { type: "done" })
|
||||
}
|
||||
} else {
|
||||
// FAILED
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// TODO: handle failed jobs
|
||||
|
||||
scrapeQueueEvents.addListener("completed", completedListener);
|
||||
|
||||
doneJobIDs = await getDoneJobsOrdered(req.params.jobId);
|
||||
|
||||
const jobIDs = await getCrawlJobs(req.params.jobId);
|
||||
const jobStatuses = await Promise.all(jobIDs.map(x => getScrapeQueue().getJobState(x)));
|
||||
const status: Exclude<CrawlStatusResponse, ErrorResponse>["status"] = sc.cancelled ? "cancelled" : jobStatuses.every(x => x === "completed") ? "completed" : jobStatuses.some(x => x === "failed") ? "failed" : "scraping";
|
||||
const doneJobs = await getJobs(doneJobIDs);
|
||||
const data = doneJobs.map(x => x.returnvalue);
|
||||
|
||||
send(ws, {
|
||||
type: "catchup",
|
||||
data: {
|
||||
status,
|
||||
totalCount: jobIDs.length,
|
||||
creditsUsed: jobIDs.length,
|
||||
expiresAt: (await getCrawlExpiry(req.params.jobId)).toISOString(),
|
||||
data: data.map(x => legacyDocumentConverter(x)),
|
||||
}
|
||||
});
|
||||
|
||||
if (status !== "scraping") {
|
||||
scrapeQueueEvents.removeListener("completed", completedListener);
|
||||
return close(ws, 1000, { type: "done" });
|
||||
}
|
||||
}
|
||||
|
||||
// Basically just middleware and error wrapping
|
||||
export async function crawlStatusWSController(ws: WebSocket, req: RequestWithAuth<CrawlStatusParams, undefined, undefined>) {
|
||||
try {
|
||||
const { success, team_id, error, status, plan } = await authenticateUser(
|
||||
req,
|
||||
null,
|
||||
RateLimiterMode.CrawlStatus,
|
||||
);
|
||||
|
||||
if (!success) {
|
||||
return close(ws, 3000, {
|
||||
type: "error",
|
||||
error,
|
||||
});
|
||||
}
|
||||
|
||||
req.auth = { team_id, plan };
|
||||
|
||||
await crawlStatusWS(ws, req);
|
||||
} catch (err) {
|
||||
const id = uuidv4();
|
||||
let verbose = JSON.stringify(err);
|
||||
if (verbose === "{}") {
|
||||
if (err instanceof Error) {
|
||||
verbose = JSON.stringify({
|
||||
message: err.message,
|
||||
name: err.name,
|
||||
stack: err.stack,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Logger.error("Error occurred in WebSocket! (" + req.path + ") -- ID " + id + " -- " + verbose);
|
||||
return close(ws, 1011, {
|
||||
type: "error",
|
||||
error: "An unexpected error occurred. Please contact hello@firecrawl.com for help. Your exception ID is " + id
|
||||
});
|
||||
}
|
||||
}
|
|
@ -4,7 +4,24 @@ import { getCrawl, getCrawlExpiry, getCrawlJobs, getDoneJobsOrdered, getDoneJobs
|
|||
import { getScrapeQueue } from "../../services/queue-service";
|
||||
import { supabaseGetJobById, supabaseGetJobsById } from "../../lib/supabase-jobs";
|
||||
|
||||
async function getJobs(ids: string[]) {
|
||||
export async function getJob(id: string) {
|
||||
const job = await getScrapeQueue().getJob(id);
|
||||
if (!job) return job;
|
||||
|
||||
if (process.env.USE_DB_AUTHENTICATION === "true") {
|
||||
const supabaseData = await supabaseGetJobById(id);
|
||||
|
||||
if (supabaseData) {
|
||||
job.returnvalue = supabaseData.docs;
|
||||
}
|
||||
}
|
||||
|
||||
job.returnvalue = Array.isArray(job.returnvalue) ? job.returnvalue[0] : job.returnvalue;
|
||||
|
||||
return job;
|
||||
}
|
||||
|
||||
export async function getJobs(ids: string[]) {
|
||||
const jobs = (await Promise.all(ids.map(x => getScrapeQueue().getJob(x)))).filter(x => x);
|
||||
|
||||
if (process.env.USE_DB_AUTHENTICATION === "true") {
|
||||
|
|
|
@ -14,6 +14,8 @@ import http from 'node:http';
|
|||
import https from 'node:https';
|
||||
import CacheableLookup from 'cacheable-lookup';
|
||||
import { v1Router } from "./routes/v1";
|
||||
import expressWs from "express-ws";
|
||||
import { crawlStatusWSController } from "./controllers/v1/crawl-status-ws";
|
||||
|
||||
const { createBullBoard } = require("@bull-board/api");
|
||||
const { BullAdapter } = require("@bull-board/api/bullAdapter");
|
||||
|
@ -46,7 +48,8 @@ if (cluster.isMaster) {
|
|||
}
|
||||
});
|
||||
} else {
|
||||
const app = express();
|
||||
const ws = expressWs(express());
|
||||
const app = ws.app;
|
||||
|
||||
global.isProduction = process.env.IS_PRODUCTION === "true";
|
||||
|
||||
|
@ -79,7 +82,7 @@ if (cluster.isMaster) {
|
|||
|
||||
// register router
|
||||
app.use(v0Router);
|
||||
app.use(v1Router);
|
||||
app.use("/v1", v1Router);
|
||||
app.use(adminRouter);
|
||||
|
||||
const DEFAULT_PORT = process.env.PORT ?? 3002;
|
||||
|
|
|
@ -63,6 +63,10 @@ export async function isCrawlFinished(id: string) {
|
|||
return (await redisConnection.scard("crawl:" + id + ":jobs_done")) === (await redisConnection.scard("crawl:" + id + ":jobs"));
|
||||
}
|
||||
|
||||
export async function isCrawlFinishedLocked(id: string) {
|
||||
return (await redisConnection.exists("crawl:" + id + ":finish"));
|
||||
}
|
||||
|
||||
export async function finishCrawl(id: string) {
|
||||
if (await isCrawlFinished(id)) {
|
||||
const set = await redisConnection.setnx("crawl:" + id + ":finish", "yes");
|
||||
|
|
|
@ -13,6 +13,8 @@ import { validateIdempotencyKey } from "../services/idempotency/validate";
|
|||
import { ZodError } from "zod";
|
||||
import { checkTeamCredits } from "../services/billing/credit_billing";
|
||||
import { v4 as uuidv4 } from "uuid";
|
||||
import expressWs from "express-ws";
|
||||
import { crawlStatusWSController } from "../controllers/v1/crawl-status-ws";
|
||||
// import { crawlPreviewController } from "../../src/controllers/v1/crawlPreview";
|
||||
// import { crawlJobStatusPreviewController } from "../../src/controllers/v1/status";
|
||||
// import { searchController } from "../../src/controllers/v1/search";
|
||||
|
@ -33,7 +35,7 @@ function checkCreditsMiddleware(minimum: number): (req: RequestWithAuth, res: Re
|
|||
};
|
||||
}
|
||||
|
||||
function authMiddleware(rateLimiterMode: RateLimiterMode): (req: RequestWithMaybeAuth, res: Response, next: NextFunction) => void {
|
||||
export function authMiddleware(rateLimiterMode: RateLimiterMode): (req: RequestWithMaybeAuth, res: Response, next: NextFunction) => void {
|
||||
return (req, res, next) => {
|
||||
(async () => {
|
||||
const { success, team_id, error, status, plan } = await authenticateUser(
|
||||
|
@ -74,17 +76,19 @@ function wrap(controller: (req: Request, res: Response) => Promise<any>): (req:
|
|||
}
|
||||
}
|
||||
|
||||
expressWs(express());
|
||||
|
||||
export const v1Router = express.Router();
|
||||
|
||||
v1Router.post(
|
||||
"/v1/scrape",
|
||||
"/scrape",
|
||||
authMiddleware(RateLimiterMode.Scrape),
|
||||
checkCreditsMiddleware(1),
|
||||
wrap(scrapeController)
|
||||
);
|
||||
|
||||
v1Router.post(
|
||||
"/v1/crawl",
|
||||
"/crawl",
|
||||
authMiddleware(RateLimiterMode.Crawl),
|
||||
idempotencyMiddleware,
|
||||
checkCreditsMiddleware(1),
|
||||
|
@ -92,31 +96,36 @@ v1Router.post(
|
|||
);
|
||||
|
||||
v1Router.post(
|
||||
"/v1/map",
|
||||
"/map",
|
||||
authMiddleware(RateLimiterMode.Crawl),
|
||||
checkCreditsMiddleware(1),
|
||||
wrap(mapController)
|
||||
);
|
||||
|
||||
v1Router.get(
|
||||
"/v1/crawl/:jobId",
|
||||
"/crawl/:jobId",
|
||||
authMiddleware(RateLimiterMode.CrawlStatus),
|
||||
wrap(crawlStatusController)
|
||||
);
|
||||
|
||||
// v1Router.post("/v1/crawlWebsitePreview", crawlPreviewController);
|
||||
// v1Router.delete("/v1/crawl/:jobId", crawlCancelController);
|
||||
// v1Router.get("/v1/checkJobStatus/:jobId", crawlJobStatusPreviewController);
|
||||
v1Router.ws(
|
||||
"/crawl/:jobId",
|
||||
crawlStatusWSController
|
||||
);
|
||||
|
||||
// v1Router.post("/crawlWebsitePreview", crawlPreviewController);
|
||||
// v1Router.delete("/crawl/:jobId", crawlCancelController);
|
||||
// v1Router.get("/checkJobStatus/:jobId", crawlJobStatusPreviewController);
|
||||
|
||||
// // Auth route for key based authentication
|
||||
// v1Router.get("/v1/keyAuth", keyAuthController);
|
||||
// v1Router.get("/keyAuth", keyAuthController);
|
||||
|
||||
// // Search routes
|
||||
// v0Router.post("/v1/search", searchController);
|
||||
// v0Router.post("/search", searchController);
|
||||
|
||||
// Health/Probe routes
|
||||
// v1Router.get("/v1/health/liveness", livenessController);
|
||||
// v1Router.get("/v1/health/readiness", readinessController);
|
||||
// v1Router.get("/health/liveness", livenessController);
|
||||
// v1Router.get("/health/readiness", readinessController);
|
||||
|
||||
v1Router.use((err: unknown, req: Request<{}, ErrorResponse, undefined>, res: Response<ErrorResponse>, next: NextFunction) => {
|
||||
if (err instanceof ZodError) {
|
||||
|
|
Loading…
Reference in New Issue
Block a user