feat(v1/webhook): complex webhook object w/ headers (#899)

* feat(v1/webhook): complex webhook object w/ headers

* feat(js-sdk/crawl): add complex webhook support
This commit is contained in:
Gergő Móricz 2024-11-13 19:36:44 +01:00 committed by GitHub
parent ea1302960f
commit 32be2cf786
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 28 additions and 9 deletions

View File

@ -220,11 +220,22 @@ const crawlerOptions = z.object({
export type CrawlerOptions = z.infer<typeof crawlerOptions>; export type CrawlerOptions = z.infer<typeof crawlerOptions>;
export const webhookSchema = z.preprocess(x => {
if (typeof x === "string") {
return { url: x };
} else {
return x;
}
}, z.object({
url: z.string().url(),
headers: z.record(z.string(), z.string()).default({}),
}).strict(strictMessage))
export const crawlRequestSchema = crawlerOptions.extend({ export const crawlRequestSchema = crawlerOptions.extend({
url, url,
origin: z.string().optional().default("api"), origin: z.string().optional().default("api"),
scrapeOptions: scrapeOptions.omit({ timeout: true }).default({}), scrapeOptions: scrapeOptions.omit({ timeout: true }).default({}),
webhook: z.string().url().optional(), webhook: webhookSchema.optional(),
limit: z.number().default(10000), limit: z.number().default(10000),
}).strict(strictMessage); }).strict(strictMessage);

View File

@ -1,15 +1,17 @@
import axios from "axios"; import axios from "axios";
import { logger } from "../../src/lib/logger"; import { logger } from "../lib/logger";
import { supabase_service } from "./supabase"; import { supabase_service } from "./supabase";
import { WebhookEventType } from "../types"; import { WebhookEventType } from "../types";
import { configDotenv } from "dotenv"; import { configDotenv } from "dotenv";
import { z } from "zod";
import { webhookSchema } from "../controllers/v1/types";
configDotenv(); configDotenv();
export const callWebhook = async ( export const callWebhook = async (
teamId: string, teamId: string,
id: string, id: string,
data: any | null, data: any | null,
specified?: string, specified?: z.infer<typeof webhookSchema>,
v1 = false, v1 = false,
eventType: WebhookEventType = "crawl.page", eventType: WebhookEventType = "crawl.page",
awaitWebhook: boolean = false awaitWebhook: boolean = false
@ -20,7 +22,7 @@ export const callWebhook = async (
id id
); );
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === "true"; const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === "true";
let webhookUrl = specified ?? selfHostedUrl; let webhookUrl = specified ?? (selfHostedUrl ? webhookSchema.parse({ url: selfHostedUrl }) : undefined);
// Only fetch the webhook URL from the database if the self-hosted webhook URL and specified webhook are not set // Only fetch the webhook URL from the database if the self-hosted webhook URL and specified webhook are not set
// and the USE_DB_AUTHENTICATION environment variable is set to true // and the USE_DB_AUTHENTICATION environment variable is set to true
@ -73,7 +75,7 @@ export const callWebhook = async (
if (awaitWebhook) { if (awaitWebhook) {
try { try {
await axios.post( await axios.post(
webhookUrl, webhookUrl.url,
{ {
success: !v1 success: !v1
? data.success ? data.success
@ -92,6 +94,7 @@ export const callWebhook = async (
{ {
headers: { headers: {
"Content-Type": "application/json", "Content-Type": "application/json",
...webhookUrl.headers,
}, },
timeout: v1 ? 10000 : 30000, // 10 seconds timeout (v1) timeout: v1 ? 10000 : 30000, // 10 seconds timeout (v1)
} }
@ -104,7 +107,7 @@ export const callWebhook = async (
} else { } else {
axios axios
.post( .post(
webhookUrl, webhookUrl.url,
{ {
success: !v1 success: !v1
? data.success ? data.success
@ -123,6 +126,7 @@ export const callWebhook = async (
{ {
headers: { headers: {
"Content-Type": "application/json", "Content-Type": "application/json",
...webhookUrl.headers,
}, },
timeout: v1 ? 10000 : 30000, // 10 seconds timeout (v1) timeout: v1 ? 10000 : 30000, // 10 seconds timeout (v1)
} }

View File

@ -1,4 +1,5 @@
import { AuthCreditUsageChunk, ScrapeOptions, Document as V1Document } from "./controllers/v1/types"; import { z } from "zod";
import { AuthCreditUsageChunk, ScrapeOptions, Document as V1Document, webhookSchema } from "./controllers/v1/types";
import { ExtractorOptions, Document } from "./lib/entities"; import { ExtractorOptions, Document } from "./lib/entities";
import { InternalOptions } from "./scraper/scrapeURL"; import { InternalOptions } from "./scraper/scrapeURL";
@ -33,7 +34,7 @@ export interface WebScraperOptions {
origin?: string; origin?: string;
crawl_id?: string; crawl_id?: string;
sitemapped?: boolean; sitemapped?: boolean;
webhook?: string; webhook?: z.infer<typeof webhookSchema>;
v1?: boolean; v1?: boolean;
is_scrape?: boolean; is_scrape?: boolean;
} }

View File

@ -153,7 +153,10 @@ export interface CrawlParams {
allowExternalLinks?: boolean; allowExternalLinks?: boolean;
ignoreSitemap?: boolean; ignoreSitemap?: boolean;
scrapeOptions?: CrawlScrapeOptions; scrapeOptions?: CrawlScrapeOptions;
webhook?: string; webhook?: string | {
url: string;
headers?: Record<string, string>;
};
deduplicateSimilarURLs?: boolean; deduplicateSimilarURLs?: boolean;
ignoreQueryParameters?: boolean; ignoreQueryParameters?: boolean;
} }