mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 03:32:22 +08:00
Nick:
This commit is contained in:
parent
19cba43ee4
commit
0db0874b00
|
@ -42,6 +42,7 @@ export async function crawlController(req: Request, res: Response) {
|
|||
returnOnlyUrls: true,
|
||||
},
|
||||
pageOptions: pageOptions,
|
||||
|
||||
});
|
||||
|
||||
const docs = await a.getDocuments(false, (progress) => {
|
||||
|
@ -67,6 +68,7 @@ export async function crawlController(req: Request, res: Response) {
|
|||
crawlerOptions: { ...crawlerOptions },
|
||||
team_id: team_id,
|
||||
pageOptions: pageOptions,
|
||||
origin: req.body.origin ?? "api",
|
||||
});
|
||||
|
||||
res.json({ jobId: job.id });
|
||||
|
|
|
@ -21,12 +21,14 @@ export async function crawlPreviewController(req: Request, res: Response) {
|
|||
const mode = req.body.mode ?? "crawl";
|
||||
const crawlerOptions = req.body.crawlerOptions ?? {};
|
||||
const pageOptions = req.body.pageOptions ?? { onlyMainContent: false };
|
||||
|
||||
const job = await addWebScraperJob({
|
||||
url: url,
|
||||
mode: mode ?? "crawl", // fix for single urls not working
|
||||
crawlerOptions: { ...crawlerOptions, limit: 5, maxCrawledLinks: 5 },
|
||||
team_id: "preview",
|
||||
pageOptions: pageOptions,
|
||||
origin: "website-preview",
|
||||
});
|
||||
|
||||
res.json({ jobId: job.id });
|
||||
|
|
|
@ -72,6 +72,7 @@ export async function scrapeController(req: Request, res: Response) {
|
|||
}
|
||||
const crawlerOptions = req.body.crawlerOptions ?? {};
|
||||
const pageOptions = req.body.pageOptions ?? { onlyMainContent: false };
|
||||
const origin = req.body.origin ?? "api";
|
||||
|
||||
try {
|
||||
const { success: creditsCheckSuccess, message: creditsCheckMessage } =
|
||||
|
@ -83,24 +84,27 @@ export async function scrapeController(req: Request, res: Response) {
|
|||
console.error(error);
|
||||
return res.status(500).json({ error: "Internal server error" });
|
||||
}
|
||||
|
||||
const startTime = new Date().getTime();
|
||||
const result = await scrapeHelper(
|
||||
req,
|
||||
team_id,
|
||||
crawlerOptions,
|
||||
pageOptions
|
||||
);
|
||||
const endTime = new Date().getTime();
|
||||
const timeTakenInSeconds = (endTime - startTime) / 1000;
|
||||
logJob({
|
||||
success: result.success,
|
||||
message: result.error,
|
||||
num_docs: 1,
|
||||
docs: [result.data],
|
||||
time_taken: 0,
|
||||
time_taken: timeTakenInSeconds,
|
||||
team_id: team_id,
|
||||
mode: "scrape",
|
||||
url: req.body.url,
|
||||
crawlerOptions: crawlerOptions,
|
||||
pageOptions: pageOptions,
|
||||
origin: origin,
|
||||
});
|
||||
return res.status(result.returnCode).json(result);
|
||||
} catch (error) {
|
||||
|
|
|
@ -44,7 +44,11 @@ export async function runWebScraper({
|
|||
onSuccess: (result: any) => void;
|
||||
onError: (error: any) => void;
|
||||
team_id: string;
|
||||
}): Promise<{ success: boolean; message: string; docs: CrawlResult[] }> {
|
||||
}): Promise<{
|
||||
success: boolean;
|
||||
message: string;
|
||||
docs: CrawlResult[];
|
||||
}> {
|
||||
try {
|
||||
const provider = new WebScraperDataProvider();
|
||||
if (mode === "crawl") {
|
||||
|
@ -70,7 +74,7 @@ export async function runWebScraper({
|
|||
return {
|
||||
success: true,
|
||||
message: "No pages found",
|
||||
docs: [],
|
||||
docs: []
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -87,7 +91,7 @@ export async function runWebScraper({
|
|||
return {
|
||||
success: false,
|
||||
message: "Failed to bill team, no subscription was found",
|
||||
docs: [],
|
||||
docs: []
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -17,11 +17,12 @@ export async function logJob(job: FirecrawlJob) {
|
|||
num_docs: job.num_docs,
|
||||
docs: job.docs,
|
||||
time_taken: job.time_taken,
|
||||
team_id: job.team_id,
|
||||
team_id: job.team_id === "preview" ? null : job.team_id,
|
||||
mode: job.mode,
|
||||
url: job.url,
|
||||
crawler_options: job.crawlerOptions,
|
||||
page_options: job.pageOptions,
|
||||
origin: job.origin,
|
||||
},
|
||||
]);
|
||||
if (error) {
|
||||
|
|
|
@ -17,10 +17,11 @@ getWebScraperQueue().process(
|
|||
current_url: "",
|
||||
});
|
||||
const start = Date.now();
|
||||
console.log("Processing job", job.data);
|
||||
const { success, message, docs } = await startWebScraperPipeline({ job });
|
||||
const end = Date.now();
|
||||
const timeTakenInSeconds = (end - start) / 1000;
|
||||
|
||||
|
||||
const data = {
|
||||
success: success,
|
||||
result: {
|
||||
|
@ -33,7 +34,7 @@ getWebScraperQueue().process(
|
|||
};
|
||||
|
||||
await callWebhook(job.data.team_id, data);
|
||||
|
||||
|
||||
await logJob({
|
||||
success: success,
|
||||
message: message,
|
||||
|
@ -45,6 +46,7 @@ getWebScraperQueue().process(
|
|||
url: job.data.url,
|
||||
crawlerOptions: job.data.crawlerOptions,
|
||||
pageOptions: job.data.pageOptions,
|
||||
origin: job.data.origin,
|
||||
});
|
||||
done(null, data);
|
||||
} catch (error) {
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import { supabase_service } from "./supabase";
|
||||
|
||||
export const callWebhook = async (teamId: string, data: any) => {
|
||||
try {
|
||||
const { data: webhooksData, error } = await supabase_service
|
||||
.from('webhooks')
|
||||
.select('url')
|
||||
|
@ -37,5 +38,9 @@ export const callWebhook = async (teamId: string, data: any) => {
|
|||
data: dataToSend,
|
||||
error: data.error || undefined,
|
||||
}),
|
||||
});
|
||||
}
|
||||
});
|
||||
} catch (error) {
|
||||
console.error(`Error sending webhook for team ID: ${teamId}`, error.message);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -22,6 +22,7 @@ export interface WebScraperOptions {
|
|||
crawlerOptions: any;
|
||||
pageOptions: any;
|
||||
team_id: string;
|
||||
origin?: string;
|
||||
}
|
||||
|
||||
|
||||
|
@ -36,6 +37,7 @@ export interface FirecrawlJob {
|
|||
url: string;
|
||||
crawlerOptions?: any;
|
||||
pageOptions?: any;
|
||||
origin: string;
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user