performance improv for ws

This commit is contained in:
rafaelsideguide 2024-10-15 10:12:27 -03:00
parent 3afaab13d9
commit 4afcd16e02

View File

@ -97,12 +97,23 @@ async function crawlStatusWS(ws: WebSocket, req: RequestWithAuth<CrawlStatusPara
let jobIDs = await getCrawlJobs(req.params.jobId);
let jobStatuses = await Promise.all(jobIDs.map(async x => [x, await getScrapeQueue().getJobState(x)] as const));
const throttledJobs = new Set(...await getThrottledJobs(req.auth.team_id));
jobStatuses = jobStatuses.filter(x => !throttledJobs.has(x[0])); // throttled jobs can have a failed status, but they are not actually failed
// filter out failed jobs
jobIDs = jobIDs.filter(id => !jobStatuses.some(status => status[0] === id && status[1] === "failed"));
// filter the job statues
jobStatuses = jobStatuses.filter(x => x[1] !== "failed" && x[1] !== "unknown");
const status: Exclude<CrawlStatusResponse, ErrorResponse>["status"] = sc.cancelled ? "cancelled" : jobStatuses.every(x => x[1] === "completed") ? "completed" : "scraping";
const throttledJobsSet = new Set(throttledJobs);
const validJobStatuses = [];
const validJobIDs = [];
for (const [id, status] of jobStatuses) {
if (!throttledJobsSet.has(id) && status !== "failed" && status !== "unknown") {
validJobStatuses.push([id, status]);
validJobIDs.push(id);
}
}
const status: Exclude<CrawlStatusResponse, ErrorResponse>["status"] = sc.cancelled ? "cancelled" : validJobStatuses.every(x => x[1] === "completed") ? "completed" : "scraping";
jobIDs = validJobIDs; // Use validJobIDs instead of jobIDs for further processing
const doneJobs = await getJobs(doneJobIDs);
const data = doneJobs.map(x => x.returnvalue);