This commit is contained in:
Nicolas 2024-04-20 19:37:45 -07:00
parent 19cba43ee4
commit 0db0874b00
8 changed files with 32 additions and 10 deletions

View File

@ -42,6 +42,7 @@ export async function crawlController(req: Request, res: Response) {
returnOnlyUrls: true,
},
pageOptions: pageOptions,
});
const docs = await a.getDocuments(false, (progress) => {
@ -67,6 +68,7 @@ export async function crawlController(req: Request, res: Response) {
crawlerOptions: { ...crawlerOptions },
team_id: team_id,
pageOptions: pageOptions,
origin: req.body.origin ?? "api",
});
res.json({ jobId: job.id });

View File

@ -21,12 +21,14 @@ export async function crawlPreviewController(req: Request, res: Response) {
const mode = req.body.mode ?? "crawl";
const crawlerOptions = req.body.crawlerOptions ?? {};
const pageOptions = req.body.pageOptions ?? { onlyMainContent: false };
const job = await addWebScraperJob({
url: url,
mode: mode ?? "crawl", // fix for single urls not working
crawlerOptions: { ...crawlerOptions, limit: 5, maxCrawledLinks: 5 },
team_id: "preview",
pageOptions: pageOptions,
origin: "website-preview",
});
res.json({ jobId: job.id });

View File

@ -72,6 +72,7 @@ export async function scrapeController(req: Request, res: Response) {
}
const crawlerOptions = req.body.crawlerOptions ?? {};
const pageOptions = req.body.pageOptions ?? { onlyMainContent: false };
const origin = req.body.origin ?? "api";
try {
const { success: creditsCheckSuccess, message: creditsCheckMessage } =
@ -83,24 +84,27 @@ export async function scrapeController(req: Request, res: Response) {
console.error(error);
return res.status(500).json({ error: "Internal server error" });
}
const startTime = new Date().getTime();
const result = await scrapeHelper(
req,
team_id,
crawlerOptions,
pageOptions
);
const endTime = new Date().getTime();
const timeTakenInSeconds = (endTime - startTime) / 1000;
logJob({
success: result.success,
message: result.error,
num_docs: 1,
docs: [result.data],
time_taken: 0,
time_taken: timeTakenInSeconds,
team_id: team_id,
mode: "scrape",
url: req.body.url,
crawlerOptions: crawlerOptions,
pageOptions: pageOptions,
origin: origin,
});
return res.status(result.returnCode).json(result);
} catch (error) {

View File

@ -44,7 +44,11 @@ export async function runWebScraper({
onSuccess: (result: any) => void;
onError: (error: any) => void;
team_id: string;
}): Promise<{ success: boolean; message: string; docs: CrawlResult[] }> {
}): Promise<{
success: boolean;
message: string;
docs: CrawlResult[];
}> {
try {
const provider = new WebScraperDataProvider();
if (mode === "crawl") {
@ -70,7 +74,7 @@ export async function runWebScraper({
return {
success: true,
message: "No pages found",
docs: [],
docs: []
};
}
@ -87,7 +91,7 @@ export async function runWebScraper({
return {
success: false,
message: "Failed to bill team, no subscription was found",
docs: [],
docs: []
};
}

View File

@ -17,11 +17,12 @@ export async function logJob(job: FirecrawlJob) {
num_docs: job.num_docs,
docs: job.docs,
time_taken: job.time_taken,
team_id: job.team_id,
team_id: job.team_id === "preview" ? null : job.team_id,
mode: job.mode,
url: job.url,
crawler_options: job.crawlerOptions,
page_options: job.pageOptions,
origin: job.origin,
},
]);
if (error) {

View File

@ -17,10 +17,11 @@ getWebScraperQueue().process(
current_url: "",
});
const start = Date.now();
console.log("Processing job", job.data);
const { success, message, docs } = await startWebScraperPipeline({ job });
const end = Date.now();
const timeTakenInSeconds = (end - start) / 1000;
const data = {
success: success,
result: {
@ -33,7 +34,7 @@ getWebScraperQueue().process(
};
await callWebhook(job.data.team_id, data);
await logJob({
success: success,
message: message,
@ -45,6 +46,7 @@ getWebScraperQueue().process(
url: job.data.url,
crawlerOptions: job.data.crawlerOptions,
pageOptions: job.data.pageOptions,
origin: job.data.origin,
});
done(null, data);
} catch (error) {

View File

@ -1,6 +1,7 @@
import { supabase_service } from "./supabase";
export const callWebhook = async (teamId: string, data: any) => {
try {
const { data: webhooksData, error } = await supabase_service
.from('webhooks')
.select('url')
@ -37,5 +38,9 @@ export const callWebhook = async (teamId: string, data: any) => {
data: dataToSend,
error: data.error || undefined,
}),
});
}
});
} catch (error) {
console.error(`Error sending webhook for team ID: ${teamId}`, error.message);
}
};

View File

@ -22,6 +22,7 @@ export interface WebScraperOptions {
crawlerOptions: any;
pageOptions: any;
team_id: string;
origin?: string;
}
@ -36,6 +37,7 @@ export interface FirecrawlJob {
url: string;
crawlerOptions?: any;
pageOptions?: any;
origin: string;
}