mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 03:32:22 +08:00
feat(sentry): add trace continuity for queue
This commit is contained in:
parent
6d92b8524d
commit
6d48dbcd38
|
@ -194,7 +194,14 @@ export async function crawlController(req: Request, res: Response) {
|
||||||
id,
|
id,
|
||||||
jobs.map((x) => x.opts.jobId)
|
jobs.map((x) => x.opts.jobId)
|
||||||
);
|
);
|
||||||
await getScrapeQueue().addBulk(jobs);
|
if (Sentry.isInitialized()) {
|
||||||
|
for (const job of jobs) {
|
||||||
|
// add with sentry instrumentation
|
||||||
|
await addScrapeJob(job.data as any, {}, job.opts.jobId);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
await getScrapeQueue().addBulk(jobs);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
await lockURL(id, sc, url);
|
await lockURL(id, sc, url);
|
||||||
const job = await addScrapeJob(
|
const job = await addScrapeJob(
|
||||||
|
|
|
@ -50,12 +50,12 @@ export async function scrapeHelper(
|
||||||
|
|
||||||
let doc;
|
let doc;
|
||||||
|
|
||||||
const err = await Sentry.startSpanManual({ name: "Wait for job to finish", op: "bullmq.wait", attributes: { job: jobId } }, async (span) => {
|
const err = await Sentry.startSpan({ name: "Wait for job to finish", op: "bullmq.wait", attributes: { job: jobId } }, async (span) => {
|
||||||
try {
|
try {
|
||||||
doc = (await job.waitUntilFinished(scrapeQueueEvents, timeout))[0]
|
doc = (await job.waitUntilFinished(scrapeQueueEvents, timeout))[0]
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
if (e instanceof Error && e.message.startsWith("Job wait")) {
|
if (e instanceof Error && e.message.startsWith("Job wait")) {
|
||||||
span.setAttribute("timedOut", true).end();
|
span.setAttribute("timedOut", true);
|
||||||
return {
|
return {
|
||||||
success: false,
|
success: false,
|
||||||
error: "Request timed out",
|
error: "Request timed out",
|
||||||
|
@ -65,7 +65,7 @@ export async function scrapeHelper(
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
span.setAttribute("result", JSON.stringify(doc)).end();
|
span.setAttribute("result", JSON.stringify(doc));
|
||||||
return null;
|
return null;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
|
@ -1,8 +1,6 @@
|
||||||
import { Request, Response } from "express";
|
import { Request, Response } from "express";
|
||||||
import { Logger } from "../../src/lib/logger";
|
import { Logger } from "../../src/lib/logger";
|
||||||
import { getCrawl, getCrawlJobs } from "../../src/lib/crawl-redis";
|
import { getCrawl, getCrawlJobs } from "../../src/lib/crawl-redis";
|
||||||
import { getScrapeQueue } from "../../src/services/queue-service";
|
|
||||||
import { supabaseGetJobById } from "../../src/lib/supabase-jobs";
|
|
||||||
import { getJobs } from "./crawl-status";
|
import { getJobs } from "./crawl-status";
|
||||||
import * as Sentry from "@sentry/node";
|
import * as Sentry from "@sentry/node";
|
||||||
|
|
||||||
|
|
|
@ -12,7 +12,6 @@ import { Document } from "../lib/entities";
|
||||||
import { supabase_service } from "../services/supabase";
|
import { supabase_service } from "../services/supabase";
|
||||||
import { Logger } from "../lib/logger";
|
import { Logger } from "../lib/logger";
|
||||||
import { ScrapeEvents } from "../lib/scrape-events";
|
import { ScrapeEvents } from "../lib/scrape-events";
|
||||||
import { getScrapeQueue } from "../services/queue-service";
|
|
||||||
|
|
||||||
export async function startWebScraperPipeline({
|
export async function startWebScraperPipeline({
|
||||||
job,
|
job,
|
||||||
|
|
|
@ -16,7 +16,6 @@ import {
|
||||||
replacePathsWithAbsolutePaths,
|
replacePathsWithAbsolutePaths,
|
||||||
} from "./utils/replacePaths";
|
} from "./utils/replacePaths";
|
||||||
import { generateCompletions } from "../../lib/LLM-extraction";
|
import { generateCompletions } from "../../lib/LLM-extraction";
|
||||||
import { getScrapeQueue } from "../../../src/services/queue-service";
|
|
||||||
import { fetchAndProcessDocx } from "./utils/docxProcessor";
|
import { fetchAndProcessDocx } from "./utils/docxProcessor";
|
||||||
import { getAdjustedMaxDepth, getURLDepth } from "./utils/maxDepthUtils";
|
import { getAdjustedMaxDepth, getURLDepth } from "./utils/maxDepthUtils";
|
||||||
import { Logger } from "../../lib/logger";
|
import { Logger } from "../../lib/logger";
|
||||||
|
|
|
@ -2,11 +2,12 @@ import { Job, Queue } from "bullmq";
|
||||||
import { getScrapeQueue } from "./queue-service";
|
import { getScrapeQueue } from "./queue-service";
|
||||||
import { v4 as uuidv4 } from "uuid";
|
import { v4 as uuidv4 } from "uuid";
|
||||||
import { WebScraperOptions } from "../types";
|
import { WebScraperOptions } from "../types";
|
||||||
|
import * as Sentry from "@sentry/node";
|
||||||
|
|
||||||
export async function addScrapeJob(
|
async function addScrapeJobRaw(
|
||||||
webScraperOptions: WebScraperOptions,
|
webScraperOptions: any,
|
||||||
options: any = {},
|
options: any,
|
||||||
jobId: string = uuidv4(),
|
jobId: string,
|
||||||
): Promise<Job> {
|
): Promise<Job> {
|
||||||
return await getScrapeQueue().add(jobId, webScraperOptions, {
|
return await getScrapeQueue().add(jobId, webScraperOptions, {
|
||||||
priority: webScraperOptions.crawl_id ? 20 : 10,
|
priority: webScraperOptions.crawl_id ? 20 : 10,
|
||||||
|
@ -15,3 +16,33 @@ export async function addScrapeJob(
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export async function addScrapeJob(
|
||||||
|
webScraperOptions: WebScraperOptions,
|
||||||
|
options: any = {},
|
||||||
|
jobId: string = uuidv4(),
|
||||||
|
): Promise<Job> {
|
||||||
|
if (Sentry.isInitialized()) {
|
||||||
|
const size = JSON.stringify(webScraperOptions).length;
|
||||||
|
return await Sentry.startSpan({
|
||||||
|
name: "Add scrape job",
|
||||||
|
op: "queue.publish",
|
||||||
|
attributes: {
|
||||||
|
"messaging.message.id": jobId,
|
||||||
|
"messaging.destination.name": getScrapeQueue().name,
|
||||||
|
"messaging.message.body.size": size,
|
||||||
|
},
|
||||||
|
}, async (span) => {
|
||||||
|
return await addScrapeJobRaw({
|
||||||
|
...webScraperOptions,
|
||||||
|
sentry: {
|
||||||
|
trace: Sentry.spanToTraceHeader(span),
|
||||||
|
baggage: Sentry.spanToBaggageHeader(span),
|
||||||
|
size,
|
||||||
|
},
|
||||||
|
}, options, jobId);
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
return await addScrapeJobRaw(webScraperOptions, options, jobId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -50,6 +50,7 @@ const processJobInternal = async (token: string, job: Job) => {
|
||||||
await job.extendLock(token, jobLockExtensionTime);
|
await job.extendLock(token, jobLockExtensionTime);
|
||||||
}, jobLockExtendInterval);
|
}, jobLockExtendInterval);
|
||||||
|
|
||||||
|
let err = null;
|
||||||
try {
|
try {
|
||||||
const result = await processJob(job, token);
|
const result = await processJob(job, token);
|
||||||
try{
|
try{
|
||||||
|
@ -62,11 +63,14 @@ const processJobInternal = async (token: string, job: Job) => {
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.log("Job failed, error:", error);
|
console.log("Job failed, error:", error);
|
||||||
|
Sentry.captureException(error);
|
||||||
|
err = error;
|
||||||
await job.moveToFailed(error, token, false);
|
await job.moveToFailed(error, token, false);
|
||||||
} finally {
|
} finally {
|
||||||
clearInterval(extendLockInterval);
|
clearInterval(extendLockInterval);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return err;
|
||||||
};
|
};
|
||||||
|
|
||||||
let isShuttingDown = false;
|
let isShuttingDown = false;
|
||||||
|
@ -76,7 +80,7 @@ process.on("SIGINT", () => {
|
||||||
isShuttingDown = true;
|
isShuttingDown = true;
|
||||||
});
|
});
|
||||||
|
|
||||||
const workerFun = async (queueName: string, processJobInternal: (token: string, job: Job) => Promise<void>) => {
|
const workerFun = async (queueName: string, processJobInternal: (token: string, job: Job) => Promise<any>) => {
|
||||||
const worker = new Worker(queueName, null, {
|
const worker = new Worker(queueName, null, {
|
||||||
connection: redisConnection,
|
connection: redisConnection,
|
||||||
lockDuration: 1 * 60 * 1000, // 1 minute
|
lockDuration: 1 * 60 * 1000, // 1 minute
|
||||||
|
@ -104,16 +108,47 @@ const workerFun = async (queueName: string, processJobInternal: (token: string,
|
||||||
|
|
||||||
const job = await worker.getNextJob(token);
|
const job = await worker.getNextJob(token);
|
||||||
if (job) {
|
if (job) {
|
||||||
Sentry.startSpan({
|
if (job.data && job.data.sentry && Sentry.isInitialized()) {
|
||||||
name: "Scrape job",
|
Sentry.continueTrace({ sentryTrace: job.data.sentry.trace, baggage: job.data.sentry.baggage }, () => {
|
||||||
op: "bullmq.job",
|
Sentry.startSpan({
|
||||||
attributes: {
|
name: "Scrape job",
|
||||||
job: job.id,
|
attributes: {
|
||||||
worker: process.env.FLY_MACHINE_ID ?? worker.id,
|
job: job.id,
|
||||||
},
|
worker: process.env.FLY_MACHINE_ID ?? worker.id,
|
||||||
}, async () => {
|
},
|
||||||
await processJobInternal(token, job);
|
}, async (span) => {
|
||||||
});
|
await Sentry.startSpan({
|
||||||
|
name: "Process scrape job",
|
||||||
|
op: "queue.process",
|
||||||
|
attributes: {
|
||||||
|
"messaging.message.id": job.id,
|
||||||
|
"messaging.destination.name": getScrapeQueue().name,
|
||||||
|
"messaging.message.body.size": job.data.sentry.size,
|
||||||
|
"messaging.message.receive.latency": Date.now() - (job.processedOn ?? job.timestamp),
|
||||||
|
"messaging.message.retry.count": job.attemptsMade,
|
||||||
|
}
|
||||||
|
}, async () => {
|
||||||
|
const res = await processJobInternal(token, job);
|
||||||
|
if (res !== null) {
|
||||||
|
span.setStatus({ code: 2 }); // ERROR
|
||||||
|
} else {
|
||||||
|
span.setStatus({ code: 1 }); // OK
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
Sentry.startSpan({
|
||||||
|
name: "Scrape job",
|
||||||
|
attributes: {
|
||||||
|
job: job.id,
|
||||||
|
worker: process.env.FLY_MACHINE_ID ?? worker.id,
|
||||||
|
},
|
||||||
|
}, () => {
|
||||||
|
processJobInternal(token, job);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
await sleep(gotJobInterval);
|
await sleep(gotJobInterval);
|
||||||
} else {
|
} else {
|
||||||
await sleep(connectionMonitorInterval);
|
await sleep(connectionMonitorInterval);
|
||||||
|
|
|
@ -10,8 +10,9 @@ if (process.env.SENTRY_DSN) {
|
||||||
integrations: [
|
integrations: [
|
||||||
nodeProfilingIntegration(),
|
nodeProfilingIntegration(),
|
||||||
],
|
],
|
||||||
tracesSampleRate: 0.045,
|
tracesSampleRate: process.env.SENTRY_ENVIRONMENT === "dev" ? 1.0 : 0.045,
|
||||||
profilesSampleRate: 1.0,
|
profilesSampleRate: 1.0,
|
||||||
serverName: process.env.FLY_MACHINE_ID,
|
serverName: process.env.FLY_MACHINE_ID,
|
||||||
|
environment: process.env.SENTRY_ENVIRONMENT ?? "production",
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user