mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 03:32:22 +08:00
914 lines
32 KiB
JSON
914 lines
32 KiB
JSON
{
|
|
"openapi": "3.0.0",
|
|
"info": {
|
|
"title": "Firecrawl API",
|
|
"version": "1.0.0",
|
|
"description": "API for interacting with Firecrawl services to perform web scraping and crawling tasks.",
|
|
"contact": {
|
|
"name": "Firecrawl Support",
|
|
"url": "https://firecrawl.dev/support",
|
|
"email": "support@firecrawl.dev"
|
|
}
|
|
},
|
|
"servers": [
|
|
{
|
|
"url": "https://api.firecrawl.dev/v0"
|
|
}
|
|
],
|
|
"paths": {
|
|
"/scrape": {
|
|
"post": {
|
|
"summary": "Scrape a single URL and optionally extract information using an LLM",
|
|
"operationId": "scrapeAndExtractFromUrl",
|
|
"tags": ["Scraping"],
|
|
"security": [
|
|
{
|
|
"bearerAuth": []
|
|
}
|
|
],
|
|
"requestBody": {
|
|
"required": true,
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"url": {
|
|
"type": "string",
|
|
"format": "uri",
|
|
"description": "The URL to scrape"
|
|
},
|
|
"pageOptions": {
|
|
"type": "object",
|
|
"properties": {
|
|
"headers": {
|
|
"type": "object",
|
|
"description": "Headers to send with the request. Can be used to send cookies, user-agent, etc."
|
|
},
|
|
"includeHtml": {
|
|
"type": "boolean",
|
|
"description": "Include the HTML version of the content on page. Will output a html key in the response.",
|
|
"default": false
|
|
},
|
|
"includeRawHtml": {
|
|
"type": "boolean",
|
|
"description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
|
|
"default": false
|
|
},
|
|
"onlyIncludeTags": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"description": "Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'"
|
|
},
|
|
"onlyMainContent": {
|
|
"type": "boolean",
|
|
"description": "Only return the main content of the page excluding headers, navs, footers, etc.",
|
|
"default": false
|
|
},
|
|
"removeTags": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"description": "Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'"
|
|
},
|
|
"replaceAllPathsWithAbsolutePaths": {
|
|
"type": "boolean",
|
|
"description": "Replace all relative paths with absolute paths for images and links",
|
|
"default": false
|
|
},
|
|
"screenshot": {
|
|
"type": "boolean",
|
|
"description": "Include a screenshot of the top of the page that you are scraping.",
|
|
"default": false
|
|
},
|
|
"waitFor": {
|
|
"type": "integer",
|
|
"description": "Wait x amount of milliseconds for the page to load to fetch content",
|
|
"default": 0
|
|
}
|
|
}
|
|
},
|
|
"extractorOptions": {
|
|
"type": "object",
|
|
"description": "Options for extraction of structured information from the page content. Note: LLM-based extraction is not performed by default and only occurs when explicitly configured. The 'markdown' mode simply returns the scraped markdown and is the default mode for scraping.",
|
|
"default": {},
|
|
"properties": {
|
|
"mode": {
|
|
"type": "string",
|
|
"enum": ["markdown", "llm-extraction", "llm-extraction-from-raw-html", "llm-extraction-from-markdown"],
|
|
"description": "The extraction mode to use. 'markdown': Returns the scraped markdown content, does not perform LLM extraction. 'llm-extraction': Extracts information from the cleaned and parsed content using LLM. 'llm-extraction-from-raw-html': Extracts information directly from the raw HTML using LLM. 'llm-extraction-from-markdown': Extracts information from the markdown content using LLM."
|
|
},
|
|
"extractionPrompt": {
|
|
"type": "string",
|
|
"description": "A prompt describing what information to extract from the page, applicable for LLM extraction modes."
|
|
},
|
|
"extractionSchema": {
|
|
"type": "object",
|
|
"additionalProperties": true,
|
|
"description": "The schema for the data to be extracted, required only for LLM extraction modes.",
|
|
"required": [
|
|
"company_mission",
|
|
"supports_sso",
|
|
"is_open_source"
|
|
]
|
|
}
|
|
}
|
|
},
|
|
"timeout": {
|
|
"type": "integer",
|
|
"description": "Timeout in milliseconds for the request",
|
|
"default": 30000
|
|
}
|
|
},
|
|
"required": ["url"]
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"responses": {
|
|
"200": {
|
|
"description": "Successful response",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/ScrapeResponse"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"402": {
|
|
"description": "Payment required",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"error": {
|
|
"type": "string",
|
|
"example": "Payment required to access this resource."
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"429": {
|
|
"description": "Too many requests",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"error": {
|
|
"type": "string",
|
|
"example": "Request rate limit exceeded. Please wait and try again later."
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"500": {
|
|
"description": "Server error",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"error": {
|
|
"type": "string",
|
|
"example": "An unexpected error occurred on the server."
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"/crawl": {
|
|
"post": {
|
|
"summary": "Crawl multiple URLs based on options",
|
|
"operationId": "crawlUrls",
|
|
"tags": ["Crawling"],
|
|
"security": [
|
|
{
|
|
"bearerAuth": []
|
|
}
|
|
],
|
|
"requestBody": {
|
|
"required": true,
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"url": {
|
|
"type": "string",
|
|
"format": "uri",
|
|
"description": "The base URL to start crawling from"
|
|
},
|
|
"crawlerOptions": {
|
|
"type": "object",
|
|
"properties": {
|
|
"includes": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"description": "URL patterns to include"
|
|
},
|
|
"excludes": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"description": "URL patterns to exclude"
|
|
},
|
|
"generateImgAltText": {
|
|
"type": "boolean",
|
|
"description": "Generate alt text for images using LLMs (must have a paid plan)",
|
|
"default": false
|
|
},
|
|
"returnOnlyUrls": {
|
|
"type": "boolean",
|
|
"description": "If true, returns only the URLs as a list on the crawl status. Attention: the return response will be a list of URLs inside the data, not a list of documents.",
|
|
"default": false
|
|
},
|
|
"maxDepth": {
|
|
"type": "integer",
|
|
"description": "Maximum depth to crawl relative to the entered URL. A maxDepth of 0 scrapes only the entered URL. A maxDepth of 1 scrapes the entered URL and all pages one level deep. A maxDepth of 2 scrapes the entered URL and all pages up to two levels deep. Higher values follow the same pattern."
|
|
},
|
|
"mode": {
|
|
"type": "string",
|
|
"enum": ["default", "fast"],
|
|
"description": "The crawling mode to use. Fast mode crawls 4x faster websites without sitemap, but may not be as accurate and shouldn't be used in heavy js-rendered websites.",
|
|
"default": "default"
|
|
},
|
|
"ignoreSitemap": {
|
|
"type": "boolean",
|
|
"description": "Ignore the website sitemap when crawling",
|
|
"default": false
|
|
},
|
|
"limit": {
|
|
"type": "integer",
|
|
"description": "Maximum number of pages to crawl",
|
|
"default": 10000
|
|
},
|
|
"allowBackwardCrawling": {
|
|
"type": "boolean",
|
|
"description": "Enables the crawler to navigate from a specific URL to previously linked pages. For instance, from 'example.com/product/123' back to 'example.com/product'",
|
|
"default": false
|
|
},
|
|
"allowExternalContentLinks": {
|
|
"type": "boolean",
|
|
"description": "Allows the crawler to follow links to external websites.",
|
|
"default": false
|
|
}
|
|
}
|
|
},
|
|
"pageOptions": {
|
|
"type": "object",
|
|
"properties": {
|
|
"headers": {
|
|
"type": "object",
|
|
"description": "Headers to send with the request. Can be used to send cookies, user-agent, etc."
|
|
},
|
|
"includeHtml": {
|
|
"type": "boolean",
|
|
"description": "Include the HTML version of the content on page. Will output a html key in the response.",
|
|
"default": false
|
|
},
|
|
"includeRawHtml": {
|
|
"type": "boolean",
|
|
"description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
|
|
"default": false
|
|
},
|
|
"onlyIncludeTags": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"description": "Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'"
|
|
},
|
|
"onlyMainContent": {
|
|
"type": "boolean",
|
|
"description": "Only return the main content of the page excluding headers, navs, footers, etc.",
|
|
"default": false
|
|
},
|
|
"removeTags": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"description": "Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'"
|
|
},
|
|
"replaceAllPathsWithAbsolutePaths": {
|
|
"type": "boolean",
|
|
"description": "Replace all relative paths with absolute paths for images and links",
|
|
"default": false
|
|
},
|
|
"screenshot": {
|
|
"type": "boolean",
|
|
"description": "Include a screenshot of the top of the page that you are scraping.",
|
|
"default": false
|
|
},
|
|
"waitFor": {
|
|
"type": "integer",
|
|
"description": "Wait x amount of milliseconds for the page to load to fetch content",
|
|
"default": 0
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"required": ["url"]
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"responses": {
|
|
"200": {
|
|
"description": "Successful response",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/CrawlResponse"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"402": {
|
|
"description": "Payment required",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"error": {
|
|
"type": "string",
|
|
"example": "Payment required to access this resource."
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"429": {
|
|
"description": "Too many requests",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"error": {
|
|
"type": "string",
|
|
"example": "Request rate limit exceeded. Please wait and try again later."
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"500": {
|
|
"description": "Server error",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"error": {
|
|
"type": "string",
|
|
"example": "An unexpected error occurred on the server."
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"/search": {
|
|
"post": {
|
|
"summary": "Search for a keyword in Google, returns top page results with markdown content for each page",
|
|
"operationId": "searchGoogle",
|
|
"tags": ["Search"],
|
|
"security": [
|
|
{
|
|
"bearerAuth": []
|
|
}
|
|
],
|
|
"requestBody": {
|
|
"required": true,
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"query": {
|
|
"type": "string",
|
|
"format": "uri",
|
|
"description": "The query to search for"
|
|
},
|
|
"pageOptions": {
|
|
"type": "object",
|
|
"properties": {
|
|
"onlyMainContent": {
|
|
"type": "boolean",
|
|
"description": "Only return the main content of the page excluding headers, navs, footers, etc.",
|
|
"default": false
|
|
},
|
|
"fetchPageContent": {
|
|
"type": "boolean",
|
|
"description": "Fetch the content of each page. If false, defaults to a basic fast serp API.",
|
|
"default": true
|
|
},
|
|
"includeHtml": {
|
|
"type": "boolean",
|
|
"description": "Include the HTML version of the content on page. Will output a html key in the response.",
|
|
"default": false
|
|
},
|
|
"includeRawHtml": {
|
|
"type": "boolean",
|
|
"description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
|
|
"default": false
|
|
}
|
|
}
|
|
},
|
|
"searchOptions": {
|
|
"type": "object",
|
|
"properties": {
|
|
"limit": {
|
|
"type": "integer",
|
|
"description": "Maximum number of results. Max is 20 during beta."
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"required": ["query"]
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"responses": {
|
|
"200": {
|
|
"description": "Successful response",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"$ref": "#/components/schemas/SearchResponse"
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"402": {
|
|
"description": "Payment required",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"error": {
|
|
"type": "string",
|
|
"example": "Payment required to access this resource."
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"429": {
|
|
"description": "Too many requests",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"error": {
|
|
"type": "string",
|
|
"example": "Request rate limit exceeded. Please wait and try again later."
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"500": {
|
|
"description": "Server error",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"error": {
|
|
"type": "string",
|
|
"example": "An unexpected error occurred on the server."
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"/crawl/status/{jobId}": {
|
|
"get": {
|
|
"tags": ["Crawl"],
|
|
"summary": "Get the status of a crawl job",
|
|
"operationId": "getCrawlStatus",
|
|
"security": [
|
|
{
|
|
"bearerAuth": []
|
|
}
|
|
],
|
|
"parameters": [
|
|
{
|
|
"name": "jobId",
|
|
"in": "path",
|
|
"description": "ID of the crawl job",
|
|
"required": true,
|
|
"schema": {
|
|
"type": "string"
|
|
}
|
|
}
|
|
],
|
|
"responses": {
|
|
"200": {
|
|
"description": "Successful response",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"status": {
|
|
"type": "string",
|
|
"description": "Status of the job (completed, active, failed, paused)"
|
|
},
|
|
"current": {
|
|
"type": "integer",
|
|
"description": "Current page number"
|
|
},
|
|
"total": {
|
|
"type": "integer",
|
|
"description": "Total number of pages"
|
|
},
|
|
"data": {
|
|
"type": "array",
|
|
"items": {
|
|
"$ref": "#/components/schemas/CrawlStatusResponseObj"
|
|
},
|
|
"description": "Data returned from the job (null when it is in progress)"
|
|
},
|
|
"partial_data": {
|
|
"type": "array",
|
|
"items": {
|
|
"$ref": "#/components/schemas/CrawlStatusResponseObj"
|
|
},
|
|
"description": "Partial documents returned as it is being crawled (streaming). **This feature is currently in alpha - expect breaking changes** When a page is ready, it will append to the partial_data array, so there is no need to wait for the entire website to be crawled. When the crawl is done, partial_data will become empty and the result will be available in `data`. There is a max of 50 items in the array response. The oldest item (top of the array) will be removed when the new item is added to the array."
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"402": {
|
|
"description": "Payment required",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"error": {
|
|
"type": "string",
|
|
"example": "Payment required to access this resource."
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"429": {
|
|
"description": "Too many requests",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"error": {
|
|
"type": "string",
|
|
"example": "Request rate limit exceeded. Please wait and try again later."
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"500": {
|
|
"description": "Server error",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"error": {
|
|
"type": "string",
|
|
"example": "An unexpected error occurred on the server."
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"/crawl/cancel/{jobId}": {
|
|
"delete": {
|
|
"tags": ["Crawl"],
|
|
"summary": "Cancel a crawl job",
|
|
"operationId": "cancelCrawlJob",
|
|
"security": [
|
|
{
|
|
"bearerAuth": []
|
|
}
|
|
],
|
|
"parameters": [
|
|
{
|
|
"name": "jobId",
|
|
"in": "path",
|
|
"description": "ID of the crawl job",
|
|
"required": true,
|
|
"schema": {
|
|
"type": "string"
|
|
}
|
|
}
|
|
],
|
|
"responses": {
|
|
"200": {
|
|
"description": "Successful response",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"status": {
|
|
"type": "string",
|
|
"description": "Returns cancelled."
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"402": {
|
|
"description": "Payment required",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"error": {
|
|
"type": "string",
|
|
"example": "Payment required to access this resource."
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"429": {
|
|
"description": "Too many requests",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"error": {
|
|
"type": "string",
|
|
"example": "Request rate limit exceeded. Please wait and try again later."
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"500": {
|
|
"description": "Server error",
|
|
"content": {
|
|
"application/json": {
|
|
"schema": {
|
|
"type": "object",
|
|
"properties": {
|
|
"error": {
|
|
"type": "string",
|
|
"example": "An unexpected error occurred on the server."
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"components": {
|
|
"securitySchemes": {
|
|
"bearerAuth": {
|
|
"type": "http",
|
|
"scheme": "bearer"
|
|
}
|
|
},
|
|
"schemas": {
|
|
"ScrapeResponse": {
|
|
"type": "object",
|
|
"properties": {
|
|
"success": {
|
|
"type": "boolean"
|
|
},
|
|
"data": {
|
|
"type": "object",
|
|
"properties": {
|
|
"markdown": {
|
|
"type": "string"
|
|
},
|
|
"content": {
|
|
"type": "string"
|
|
},
|
|
"html": {
|
|
"type": "string",
|
|
"nullable": true,
|
|
"description": "HTML version of the content on page if `includeHtml` is true"
|
|
},
|
|
"rawHtml": {
|
|
"type": "string",
|
|
"nullable": true,
|
|
"description": "Raw HTML content of the page if `includeRawHtml` is true"
|
|
},
|
|
"metadata": {
|
|
"type": "object",
|
|
"properties": {
|
|
"title": {
|
|
"type": "string"
|
|
},
|
|
"description": {
|
|
"type": "string"
|
|
},
|
|
"language": {
|
|
"type": "string",
|
|
"nullable": true
|
|
},
|
|
"sourceURL": {
|
|
"type": "string",
|
|
"format": "uri"
|
|
},
|
|
"<any other metadata> ": {
|
|
"type": "string"
|
|
},
|
|
"pageStatusCode": {
|
|
"type": "integer",
|
|
"description": "The status code of the page"
|
|
},
|
|
"pageError": {
|
|
"type": "string",
|
|
"nullable": true,
|
|
"description": "The error message of the page"
|
|
}
|
|
|
|
}
|
|
},
|
|
"llm_extraction": {
|
|
"type": "object",
|
|
"description": "Displayed when using LLM Extraction. Extracted data from the page following the schema defined.",
|
|
"nullable": true
|
|
},
|
|
"warning": {
|
|
"type": "string",
|
|
"nullable": true,
|
|
"description": "Can be displayed when using LLM Extraction. Warning message will let you know any issues with the extraction."
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"CrawlStatusResponseObj": {
|
|
"type": "object",
|
|
"properties": {
|
|
"markdown": {
|
|
"type": "string"
|
|
},
|
|
"content": {
|
|
"type": "string"
|
|
},
|
|
"html": {
|
|
"type": "string",
|
|
"nullable": true,
|
|
"description": "HTML version of the content on page if `includeHtml` is true"
|
|
},
|
|
"rawHtml": {
|
|
"type": "string",
|
|
"nullable": true,
|
|
"description": "Raw HTML content of the page if `includeRawHtml` is true"
|
|
},
|
|
"index": {
|
|
"type": "integer",
|
|
"description": "The number of the page that was crawled. This is useful for `partial_data` so you know which page the data is from."
|
|
},
|
|
"metadata": {
|
|
"type": "object",
|
|
"properties": {
|
|
"title": {
|
|
"type": "string"
|
|
},
|
|
"description": {
|
|
"type": "string"
|
|
},
|
|
"language": {
|
|
"type": "string",
|
|
"nullable": true
|
|
},
|
|
"sourceURL": {
|
|
"type": "string",
|
|
"format": "uri"
|
|
},
|
|
"<any other metadata> ": {
|
|
"type": "string"
|
|
},
|
|
"pageStatusCode": {
|
|
"type": "integer",
|
|
"description": "The status code of the page"
|
|
},
|
|
"pageError": {
|
|
"type": "string",
|
|
"nullable": true,
|
|
"description": "The error message of the page"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"SearchResponse": {
|
|
"type": "object",
|
|
"properties": {
|
|
"success": {
|
|
"type": "boolean"
|
|
},
|
|
"data": {
|
|
"type": "array",
|
|
"items": {
|
|
"type": "object",
|
|
"properties": {
|
|
"url": {
|
|
"type": "string"
|
|
},
|
|
"markdown": {
|
|
"type": "string"
|
|
},
|
|
"content": {
|
|
"type": "string"
|
|
},
|
|
"metadata": {
|
|
"type": "object",
|
|
"properties": {
|
|
"title": {
|
|
"type": "string"
|
|
},
|
|
"description": {
|
|
"type": "string"
|
|
},
|
|
"language": {
|
|
"type": "string",
|
|
"nullable": true
|
|
},
|
|
"sourceURL": {
|
|
"type": "string",
|
|
"format": "uri"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"CrawlResponse": {
|
|
"type": "object",
|
|
"properties": {
|
|
"jobId": {
|
|
"type": "string"
|
|
}
|
|
}
|
|
}
|
|
}
|
|
},
|
|
"security": [
|
|
{
|
|
"bearerAuth": []
|
|
}
|
|
]
|
|
} |