From 4afcd16e02ccd7f36c9d73dbff80102329adf596 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Tue, 15 Oct 2024 10:12:27 -0300 Subject: [PATCH] performance improv for ws --- .../api/src/controllers/v1/crawl-status-ws.ts | 23 ++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/apps/api/src/controllers/v1/crawl-status-ws.ts b/apps/api/src/controllers/v1/crawl-status-ws.ts index 0d6d5803..3738e3a2 100644 --- a/apps/api/src/controllers/v1/crawl-status-ws.ts +++ b/apps/api/src/controllers/v1/crawl-status-ws.ts @@ -97,12 +97,23 @@ async function crawlStatusWS(ws: WebSocket, req: RequestWithAuth [x, await getScrapeQueue().getJobState(x)] as const)); const throttledJobs = new Set(...await getThrottledJobs(req.auth.team_id)); - jobStatuses = jobStatuses.filter(x => !throttledJobs.has(x[0])); // throttled jobs can have a failed status, but they are not actually failed - // filter out failed jobs - jobIDs = jobIDs.filter(id => !jobStatuses.some(status => status[0] === id && status[1] === "failed")); - // filter the job statues - jobStatuses = jobStatuses.filter(x => x[1] !== "failed" && x[1] !== "unknown"); - const status: Exclude["status"] = sc.cancelled ? "cancelled" : jobStatuses.every(x => x[1] === "completed") ? "completed" : "scraping"; + + const throttledJobsSet = new Set(throttledJobs); + + const validJobStatuses = []; + const validJobIDs = []; + + for (const [id, status] of jobStatuses) { + if (!throttledJobsSet.has(id) && status !== "failed" && status !== "unknown") { + validJobStatuses.push([id, status]); + validJobIDs.push(id); + } + } + + const status: Exclude["status"] = sc.cancelled ? "cancelled" : validJobStatuses.every(x => x[1] === "completed") ? "completed" : "scraping"; + + jobIDs = validJobIDs; // Use validJobIDs instead of jobIDs for further processing + const doneJobs = await getJobs(doneJobIDs); const data = doneJobs.map(x => x.returnvalue);