feat(v1/webhook): complex webhook object w/ headers (#899)

* feat(v1/webhook): complex webhook object w/ headers

* feat(js-sdk/crawl): add complex webhook support
This commit is contained in:
Gergő Móricz 2024-11-13 19:36:44 +01:00 committed by GitHub
parent ea1302960f
commit 32be2cf786
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 28 additions and 9 deletions

View File

@ -220,11 +220,22 @@ const crawlerOptions = z.object({
export type CrawlerOptions = z.infer<typeof crawlerOptions>;
export const webhookSchema = z.preprocess(x => {
if (typeof x === "string") {
return { url: x };
} else {
return x;
}
}, z.object({
url: z.string().url(),
headers: z.record(z.string(), z.string()).default({}),
}).strict(strictMessage))
export const crawlRequestSchema = crawlerOptions.extend({
url,
origin: z.string().optional().default("api"),
scrapeOptions: scrapeOptions.omit({ timeout: true }).default({}),
webhook: z.string().url().optional(),
webhook: webhookSchema.optional(),
limit: z.number().default(10000),
}).strict(strictMessage);

View File

@ -1,15 +1,17 @@
import axios from "axios";
import { logger } from "../../src/lib/logger";
import { logger } from "../lib/logger";
import { supabase_service } from "./supabase";
import { WebhookEventType } from "../types";
import { configDotenv } from "dotenv";
import { z } from "zod";
import { webhookSchema } from "../controllers/v1/types";
configDotenv();
export const callWebhook = async (
teamId: string,
id: string,
data: any | null,
specified?: string,
specified?: z.infer<typeof webhookSchema>,
v1 = false,
eventType: WebhookEventType = "crawl.page",
awaitWebhook: boolean = false
@ -20,7 +22,7 @@ export const callWebhook = async (
id
);
const useDbAuthentication = process.env.USE_DB_AUTHENTICATION === "true";
let webhookUrl = specified ?? selfHostedUrl;
let webhookUrl = specified ?? (selfHostedUrl ? webhookSchema.parse({ url: selfHostedUrl }) : undefined);
// Only fetch the webhook URL from the database if the self-hosted webhook URL and specified webhook are not set
// and the USE_DB_AUTHENTICATION environment variable is set to true
@ -73,7 +75,7 @@ export const callWebhook = async (
if (awaitWebhook) {
try {
await axios.post(
webhookUrl,
webhookUrl.url,
{
success: !v1
? data.success
@ -92,6 +94,7 @@ export const callWebhook = async (
{
headers: {
"Content-Type": "application/json",
...webhookUrl.headers,
},
timeout: v1 ? 10000 : 30000, // 10 seconds timeout (v1)
}
@ -104,7 +107,7 @@ export const callWebhook = async (
} else {
axios
.post(
webhookUrl,
webhookUrl.url,
{
success: !v1
? data.success
@ -123,6 +126,7 @@ export const callWebhook = async (
{
headers: {
"Content-Type": "application/json",
...webhookUrl.headers,
},
timeout: v1 ? 10000 : 30000, // 10 seconds timeout (v1)
}

View File

@ -1,4 +1,5 @@
import { AuthCreditUsageChunk, ScrapeOptions, Document as V1Document } from "./controllers/v1/types";
import { z } from "zod";
import { AuthCreditUsageChunk, ScrapeOptions, Document as V1Document, webhookSchema } from "./controllers/v1/types";
import { ExtractorOptions, Document } from "./lib/entities";
import { InternalOptions } from "./scraper/scrapeURL";
@ -33,7 +34,7 @@ export interface WebScraperOptions {
origin?: string;
crawl_id?: string;
sitemapped?: boolean;
webhook?: string;
webhook?: z.infer<typeof webhookSchema>;
v1?: boolean;
is_scrape?: boolean;
}

View File

@ -153,7 +153,10 @@ export interface CrawlParams {
allowExternalLinks?: boolean;
ignoreSitemap?: boolean;
scrapeOptions?: CrawlScrapeOptions;
webhook?: string;
webhook?: string | {
url: string;
headers?: Record<string, string>;
};
deduplicateSimilarURLs?: boolean;
ignoreQueryParameters?: boolean;
}