mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 03:32:22 +08:00
Merge pull request #780 from mendableai/feat/improv-filter-perfomance
[Feat] Performance improvements crawl status filters
This commit is contained in:
commit
784aa789cb
|
@ -97,12 +97,23 @@ async function crawlStatusWS(ws: WebSocket, req: RequestWithAuth<CrawlStatusPara
|
||||||
let jobIDs = await getCrawlJobs(req.params.jobId);
|
let jobIDs = await getCrawlJobs(req.params.jobId);
|
||||||
let jobStatuses = await Promise.all(jobIDs.map(async x => [x, await getScrapeQueue().getJobState(x)] as const));
|
let jobStatuses = await Promise.all(jobIDs.map(async x => [x, await getScrapeQueue().getJobState(x)] as const));
|
||||||
const throttledJobs = new Set(...await getThrottledJobs(req.auth.team_id));
|
const throttledJobs = new Set(...await getThrottledJobs(req.auth.team_id));
|
||||||
jobStatuses = jobStatuses.filter(x => !throttledJobs.has(x[0])); // throttled jobs can have a failed status, but they are not actually failed
|
|
||||||
// filter out failed jobs
|
const throttledJobsSet = new Set(throttledJobs);
|
||||||
jobIDs = jobIDs.filter(id => !jobStatuses.some(status => status[0] === id && status[1] === "failed"));
|
|
||||||
// filter the job statues
|
const validJobStatuses = [];
|
||||||
jobStatuses = jobStatuses.filter(x => x[1] !== "failed" && x[1] !== "unknown");
|
const validJobIDs = [];
|
||||||
const status: Exclude<CrawlStatusResponse, ErrorResponse>["status"] = sc.cancelled ? "cancelled" : jobStatuses.every(x => x[1] === "completed") ? "completed" : "scraping";
|
|
||||||
|
for (const [id, status] of jobStatuses) {
|
||||||
|
if (!throttledJobsSet.has(id) && status !== "failed" && status !== "unknown") {
|
||||||
|
validJobStatuses.push([id, status]);
|
||||||
|
validJobIDs.push(id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const status: Exclude<CrawlStatusResponse, ErrorResponse>["status"] = sc.cancelled ? "cancelled" : validJobStatuses.every(x => x[1] === "completed") ? "completed" : "scraping";
|
||||||
|
|
||||||
|
jobIDs = validJobIDs; // Use validJobIDs instead of jobIDs for further processing
|
||||||
|
|
||||||
const doneJobs = await getJobs(doneJobIDs);
|
const doneJobs = await getJobs(doneJobIDs);
|
||||||
const data = doneJobs.map(x => x.returnvalue);
|
const data = doneJobs.map(x => x.returnvalue);
|
||||||
|
|
||||||
|
|
|
@ -60,12 +60,24 @@ export async function crawlStatusController(req: RequestWithAuth<CrawlStatusPara
|
||||||
let jobIDs = await getCrawlJobs(req.params.jobId);
|
let jobIDs = await getCrawlJobs(req.params.jobId);
|
||||||
let jobStatuses = await Promise.all(jobIDs.map(async x => [x, await getScrapeQueue().getJobState(x)] as const));
|
let jobStatuses = await Promise.all(jobIDs.map(async x => [x, await getScrapeQueue().getJobState(x)] as const));
|
||||||
const throttledJobs = new Set(...await getThrottledJobs(req.auth.team_id));
|
const throttledJobs = new Set(...await getThrottledJobs(req.auth.team_id));
|
||||||
jobStatuses = jobStatuses.filter(x => !throttledJobs.has(x[0])); // throttled jobs can have a failed status, but they are not actually failed
|
|
||||||
// filter out failed jobs
|
const throttledJobsSet = new Set(throttledJobs);
|
||||||
jobIDs = jobIDs.filter(id => !jobStatuses.some(status => status[0] === id && status[1] === "failed"));
|
|
||||||
// filter the job statues
|
const validJobStatuses = [];
|
||||||
jobStatuses = jobStatuses.filter(x => x[1] !== "failed" && x[1] !== "unknown");
|
const validJobIDs = [];
|
||||||
const status: Exclude<CrawlStatusResponse, ErrorResponse>["status"] = sc.cancelled ? "cancelled" : jobStatuses.every(x => x[1] === "completed") ? "completed" : "scraping";
|
|
||||||
|
for (const [id, status] of jobStatuses) {
|
||||||
|
if (!throttledJobsSet.has(id) && status !== "failed" && status !== "unknown") {
|
||||||
|
validJobStatuses.push([id, status]);
|
||||||
|
validJobIDs.push(id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const status: Exclude<CrawlStatusResponse, ErrorResponse>["status"] = sc.cancelled ? "cancelled" : validJobStatuses.every(x => x[1] === "completed") ? "completed" : "scraping";
|
||||||
|
|
||||||
|
// Use validJobIDs instead of jobIDs for further processing
|
||||||
|
jobIDs = validJobIDs;
|
||||||
|
|
||||||
const doneJobsLength = await getDoneJobsOrderedLength(req.params.jobId);
|
const doneJobsLength = await getDoneJobsOrderedLength(req.params.jobId);
|
||||||
const doneJobsOrder = await getDoneJobsOrdered(req.params.jobId, start, end ?? -1);
|
const doneJobsOrder = await getDoneJobsOrdered(req.params.jobId, start, end ?? -1);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user