From 32be2cf786294b8722f85eb45ba3202236fb885d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20M=C3=B3ricz?= Date: Wed, 13 Nov 2024 19:36:44 +0100 Subject: [PATCH] feat(v1/webhook): complex webhook object w/ headers (#899) * feat(v1/webhook): complex webhook object w/ headers * feat(js-sdk/crawl): add complex webhook support --- apps/api/src/controllers/v1/types.ts | 13 ++++++++++++- apps/api/src/services/webhook.ts | 14 +++++++++----- apps/api/src/types.ts | 5 +++-- apps/js-sdk/firecrawl/src/index.ts | 5 ++++- 4 files changed, 28 insertions(+), 9 deletions(-) diff --git a/apps/api/src/controllers/v1/types.ts b/apps/api/src/controllers/v1/types.ts index ec78509a..b2edd6e7 100644 --- a/apps/api/src/controllers/v1/types.ts +++ b/apps/api/src/controllers/v1/types.ts @@ -220,11 +220,22 @@ const crawlerOptions = z.object({ export type CrawlerOptions = z.infer; +export const webhookSchema = z.preprocess(x => { + if (typeof x === "string") { + return { url: x }; + } else { + return x; + } +}, z.object({ + url: z.string().url(), + headers: z.record(z.string(), z.string()).default({}), +}).strict(strictMessage)) + export const crawlRequestSchema = crawlerOptions.extend({ url, origin: z.string().optional().default("api"), scrapeOptions: scrapeOptions.omit({ timeout: true }).default({}), - webhook: z.string().url().optional(), + webhook: webhookSchema.optional(), limit: z.number().default(10000), }).strict(strictMessage); diff --git a/apps/api/src/services/webhook.ts b/apps/api/src/services/webhook.ts index 620b6832..1cc4db84 100644 --- a/apps/api/src/services/webhook.ts +++ b/apps/api/src/services/webhook.ts @@ -1,15 +1,17 @@ import axios from "axios"; -import { logger } from "../../src/lib/logger"; +import { logger } from "../lib/logger"; import { supabase_service } from "./supabase"; import { WebhookEventType } from "../types"; import { configDotenv } from "dotenv"; +import { z } from "zod"; +import { webhookSchema } from "../controllers/v1/types"; configDotenv(); export const callWebhook = async ( teamId: string, id: string, data: any | null, - specified?: string, + specified?: z.infer, v1 = false, eventType: WebhookEventType = "crawl.page", awaitWebhook: boolean = false @@ -20,7 +22,7 @@ export const callWebhook = async ( id ); const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === "true"; - let webhookUrl = specified ?? selfHostedUrl; + let webhookUrl = specified ?? (selfHostedUrl ? webhookSchema.parse({ url: selfHostedUrl }) : undefined); // Only fetch the webhook URL from the database if the self-hosted webhook URL and specified webhook are not set // and the USE_DB_AUTHENTICATION environment variable is set to true @@ -73,7 +75,7 @@ export const callWebhook = async ( if (awaitWebhook) { try { await axios.post( - webhookUrl, + webhookUrl.url, { success: !v1 ? data.success @@ -92,6 +94,7 @@ export const callWebhook = async ( { headers: { "Content-Type": "application/json", + ...webhookUrl.headers, }, timeout: v1 ? 10000 : 30000, // 10 seconds timeout (v1) } @@ -104,7 +107,7 @@ export const callWebhook = async ( } else { axios .post( - webhookUrl, + webhookUrl.url, { success: !v1 ? data.success @@ -123,6 +126,7 @@ export const callWebhook = async ( { headers: { "Content-Type": "application/json", + ...webhookUrl.headers, }, timeout: v1 ? 10000 : 30000, // 10 seconds timeout (v1) } diff --git a/apps/api/src/types.ts b/apps/api/src/types.ts index 2da97bd1..d7821407 100644 --- a/apps/api/src/types.ts +++ b/apps/api/src/types.ts @@ -1,4 +1,5 @@ -import { AuthCreditUsageChunk, ScrapeOptions, Document as V1Document } from "./controllers/v1/types"; +import { z } from "zod"; +import { AuthCreditUsageChunk, ScrapeOptions, Document as V1Document, webhookSchema } from "./controllers/v1/types"; import { ExtractorOptions, Document } from "./lib/entities"; import { InternalOptions } from "./scraper/scrapeURL"; @@ -33,7 +34,7 @@ export interface WebScraperOptions { origin?: string; crawl_id?: string; sitemapped?: boolean; - webhook?: string; + webhook?: z.infer; v1?: boolean; is_scrape?: boolean; } diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 401b1c20..45e19197 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -153,7 +153,10 @@ export interface CrawlParams { allowExternalLinks?: boolean; ignoreSitemap?: boolean; scrapeOptions?: CrawlScrapeOptions; - webhook?: string; + webhook?: string | { + url: string; + headers?: Record; + }; deduplicateSimilarURLs?: boolean; ignoreQueryParameters?: boolean; }