firecrawl/apps/api/openapi.json

{
  "openapi": "3.0.0",
  "info": {
    "title": "Firecrawl API",
    "version": "1.0.0",
    "description": "API for interacting with Firecrawl services to perform web scraping and crawling tasks.",
    "contact": {
      "name": "Firecrawl Support",
      "url": "https://firecrawl.dev/support",
      "email": "support@firecrawl.dev"
    }
  },
  "servers": [
    {
      "url": "https://api.firecrawl.dev/v0"
    }
  ],
  "paths": {
    "/scrape": {
      "post": {
        "summary": "Scrape a single URL and optionally extract information using an LLM",
        "operationId": "scrapeAndExtractFromUrl",
        "tags": ["Scraping"],
        "security": [
          {
            "bearerAuth": []
          }
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "type": "object",
                "properties": {
                  "url": {
                    "type": "string",
                    "format": "uri",
                    "description": "The URL to scrape"
                  },
                  "pageOptions": {
                    "type": "object",
                    "properties": {
                      "onlyMainContent": {
                        "type": "boolean",
                        "description": "Only return the main content of the page excluding headers, navs, footers, etc.",
                        "default": false
                      },
                      "includeHtml": {
                        "type": "boolean",
                        "description": "Include the raw HTML content of the page. Will output a html key in the response.",
                        "default": false
                      },
                      "screenshot": {
                        "type": "boolean",
                        "description": "Include a screenshot of the top of the page that you are scraping.",
                        "default": false
                      },
                      "waitFor": {
                        "type": "integer",
                        "description": "Wait x amount of milliseconds for the page to load to fetch content",
                        "default": 0
                      },
                      "headers": {
                        "type": "object",
                        "description": "Headers to send with the request. Can be used to send cookies, user-agent, etc."
                      }
                    }
                  },
                  "extractorOptions": {
                    "type": "object",
                    "description": "Options for LLM-based extraction of structured information from the page content",
                    "properties": {
                      "mode": {
                        "type": "string",
                        "enum": ["llm-extraction"],
                        "description": "The extraction mode to use, currently supports 'llm-extraction'"
                      },
                      "extractionPrompt": {
                        "type": "string",
                        "description": "A prompt describing what information to extract from the page"
                      },
                      "extractionSchema": {
                        "type": "object",
                        "additionalProperties": true,
                        "description": "The schema for the data to be extracted",
                        "required": [
                          "company_mission",
                          "supports_sso",
                          "is_open_source"
                        ]
                      }
                    }
                  },
                  "timeout": {
                    "type": "integer",
                    "description": "Timeout in milliseconds for the request",
                    "default": 30000
                  }
                },
                "required": ["url"]
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Successful response",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ScrapeResponse"
                }
              }
            }
          },
          "402": {
            "description": "Payment required"
          },
          "429": {
            "description": "Too many requests"
          },
          "500": {
            "description": "Server error"
          }
        }
      }
    },
    "/crawl": {
      "post": {
        "summary": "Crawl multiple URLs based on options",
        "operationId": "crawlUrls",
        "tags": ["Crawling"],
        "security": [
          {
            "bearerAuth": []
          }
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "type": "object",
                "properties": {
                  "url": {
                    "type": "string",
                    "format": "uri",
                    "description": "The base URL to start crawling from"
                  },
                  "crawlerOptions": {
                    "type": "object",
                    "properties": {
                      "includes": {
                        "type": "array",
                        "items": {
                          "type": "string"
                        },
                        "description": "URL patterns to include"
                      },
                      "excludes": {
                        "type": "array",
                        "items": {
                          "type": "string"
                        },
                        "description": "URL patterns to exclude"
                      },
                      "generateImgAltText": {
                        "type": "boolean",
                        "description": "Generate alt text for images using LLMs (must have a paid plan)",
                        "default": false
                      },
                      "returnOnlyUrls": {
                        "type": "boolean",
                        "description": "If true, returns only the URLs as a list on the crawl status. Attention: the return response will be a list of URLs inside the data, not a list of documents.",
                        "default": false
                      },
                      "maxDepth": {
                        "type": "integer",
                        "description": "Maximum depth to crawl. Depth 1 is the base URL, depth 2 is the base URL and its direct children, and so on."
                      },
                      "mode": {
                        "type": "string",
                        "enum": ["default", "fast"],
                        "description": "The crawling mode to use. Fast mode crawls 4x faster websites without sitemap, but may not be as accurate and shouldn't be used in heavy js-rendered websites.",
                        "default": "default"
                      },
                      "ignoreSitemap": {
                        "type": "boolean",
                        "description": "Ignore the website sitemap when crawling",
                        "default": false
                      },
                      "replaceAllPathsWithAbsolutePaths": {
                        "type": "boolean",
                        "description": "Replace all relative paths with absolute paths for images and links",
                        "default": false
                      },
                      "limit": {
                        "type": "integer",
                        "description": "Maximum number of pages to crawl",
                        "default": 10000
                      }
                    }
                  },
                  "pageOptions": {
                    "type": "object",
                    "properties": {
                      "onlyMainContent": {
                        "type": "boolean",
                        "description": "Only return the main content of the page excluding headers, navs, footers, etc.",
                        "default": false
                      },
                      "includeHtml": {
                        "type": "boolean",
                        "description": "Include the raw HTML content of the page. Will output a html key in the response.",
                        "default": false
                      },
                      "screenshot": {
                        "type": "boolean",
                        "description": "Include a screenshot of the top of the page that you are scraping.",
                        "default": false
                      },
                      "headers": {
                        "type": "object",
                        "description": "Headers to send with the request when scraping. Can be used to send cookies, user-agent, etc."
                      }
                    }
                  }
                },
                "required": ["url"]
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Successful response",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/CrawlResponse"
                }
              }
            }
          },
          "402": {
            "description": "Payment required"
          },
          "429": {
            "description": "Too many requests"
          },
          "500": {
            "description": "Server error"
          }
        }
      }
    },
    "/search": {
      "post": {
        "summary": "Search for a keyword in Google, returns top page results with markdown content for each page",
        "operationId": "searchGoogle",
        "tags": ["Search"],
        "security": [
          {
            "bearerAuth": []
          }
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "type": "object",
                "properties": {
                  "query": {
                    "type": "string",
                    "format": "uri",
                    "description": "The query to search for"
                  },
                  "pageOptions": {
                    "type": "object",
                    "properties": {
                      "onlyMainContent": {
                        "type": "boolean",
                        "description": "Only return the main content of the page excluding headers, navs, footers, etc.",
                        "default": false
                      },
                      "fetchPageContent": {
                        "type": "boolean",
                        "description": "Fetch the content of each page. If false, defaults to a basic fast serp API.",
                        "default": true
                      },
                      "includeHtml": {
                        "type": "boolean",
                        "description": "Include the raw HTML content of the page. Will output a html key in the response.",
                        "default": false
                      }
                    }
                  },
                  "searchOptions": {
                    "type": "object",
                    "properties": {
                      "limit": {
                        "type": "integer",
                        "description": "Maximum number of results. Max is 20 during beta."
                      }
                    }
                  }
                },
                "required": ["query"]
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Successful response",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/SearchResponse"
                }
              }
            }
          },
          "402": {
            "description": "Payment required"
          },
          "429": {
            "description": "Too many requests"
          },
          "500": {
            "description": "Server error"
          }
        }
      }
    },
    "/crawl/status/{jobId}": {
      "get": {
        "tags": ["Crawl"],
        "summary": "Get the status of a crawl job",
        "operationId": "getCrawlStatus",
        "security": [
          {
            "bearerAuth": []
          }
        ],
        "parameters": [
          {
            "name": "jobId",
            "in": "path",
            "description": "ID of the crawl job",
            "required": true,
            "schema": {
              "type": "string"
            }
          }
        ],
        "responses": {
          "200": {
            "description": "Successful response",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "status": {
                      "type": "string",
                      "description": "Status of the job (completed, active, failed, paused)"
                    },
                    "current": {
                      "type": "integer",
                      "description": "Current page number"
                    },
                    "current_url": {
                      "type": "string",
                      "description": "Current URL being scraped"
                    },
                    "current_step": {
                      "type": "string",
                      "description": "Current step in the process"
                    },
                    "total": {
                      "type": "integer",
                      "description": "Total number of pages"
                    },
                    "data": {
                      "type": "array",
                      "items": {
                        "$ref": "#/components/schemas/CrawlStatusResponseObj"
                      },
                      "description": "Data returned from the job (null when it is in progress)"
                    },
                    "partial_data": {
                      "type": "array",
                      "items": {
                        "$ref": "#/components/schemas/CrawlStatusResponseObj"
                      },
                      "description": "Partial documents returned as it is being crawled (streaming). **This feature is currently in alpha - expect breaking changes** When a page is ready, it will append to the partial_data array, so there is no need to wait for the entire website to be crawled. There is a max of 50 items in the array response. The oldest item (top of the array) will be removed when the new item is added to the array."
                    }
                  }
                }
              }
            }
          },
          "402": {
            "description": "Payment required"
          },
          "429": {
            "description": "Too many requests"
          },
          "500": {
            "description": "Server error"
          }
        }
      }
    },
    "/crawl/cancel/{jobId}": {
      "delete": {
        "tags": ["Crawl"],
        "summary": "Cancel a crawl job",
        "operationId": "cancelCrawlJob",
        "security": [
          {
            "bearerAuth": []
          }
        ],
        "parameters": [
          {
            "name": "jobId",
            "in": "path",
            "description": "ID of the crawl job",
            "required": true,
            "schema": {
              "type": "string"
            }
          }
        ],
        "responses": {
          "200": {
            "description": "Successful response",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "status": {
                      "type": "string",
                      "description": "Returns cancelled."
                    }
                  }
                }
              }
            }
          },
          "402": {
            "description": "Payment required"
          },
          "429": {
            "description": "Too many requests"
          },
          "500": {
            "description": "Server error"
          }
        }
      }
    }
  },
  "components": {
    "securitySchemes": {
      "bearerAuth": {
        "type": "http",
        "scheme": "bearer"
      }
    },
    "schemas": {
      "ScrapeResponse": {
        "type": "object",
        "properties": {
          "success": {
            "type": "boolean"
          },
          "data": {
            "type": "object",
            "properties": {
              "markdown": {
                "type": "string"
              },
              "content": {
                "type": "string"
              },
              "html": {
                "type": "string",
                "nullable": true,
                "description": "Raw HTML content of the page if `includeHtml`  is true"
              },
              "metadata": {
                "type": "object",
                "properties": {
                  "title": {
                    "type": "string"
                  },
                  "description": {
                    "type": "string"
                  },
                  "language": {
                    "type": "string",
                    "nullable": true
                  },
                  "sourceURL": {
                    "type": "string",
                    "format": "uri"
                  }
                }
              },
              "llm_extraction": {
                "type": "object",
                "description": "Displayed when using LLM Extraction. Extracted data from the page following the schema defined.",
                "nullable": true
              },
              "warning": {
                "type": "string",
                "nullable": true,
                "description": "Can be displayed when using LLM Extraction. Warning message will let you know any issues with the extraction."
              }
            }
          }
        }
      },
      "CrawlStatusResponseObj": {
        "type": "object",
        "properties": {
          "markdown": {
            "type": "string"
          },
          "content": {
            "type": "string"
          },
          "html": {
            "type": "string",
            "nullable": true,
            "description": "Raw HTML content of the page if `includeHtml`  is true"
          },
          "index": {
            "type": "integer",
            "description": "The number of the page that was crawled. This is useful for `partial_data` so you know which page the data is from." 
          },
          "metadata": {
            "type": "object",
            "properties": {
              "title": {
                "type": "string"
              },
              "description": {
                "type": "string"
              },
              "language": {
                "type": "string",
                "nullable": true
              },
              "sourceURL": {
                "type": "string",
                "format": "uri"
              }
            }
          }
        }
      },
      "SearchResponse": {
        "type": "object",
        "properties": {
          "success": {
            "type": "boolean"
          },
          "data": {
            "type": "array",
            "items": {
              "type": "object",
              "properties": {
                "url": {
                  "type": "string"
                },
                "markdown": {
                  "type": "string"
                },
                "content": {
                  "type": "string"
                },
                "metadata": {
                  "type": "object",
                  "properties": {
                    "title": {
                      "type": "string"
                    },
                    "description": {
                      "type": "string"
                    },
                    "language": {
                      "type": "string",
                      "nullable": true
                    },
                    "sourceURL": {
                      "type": "string",
                      "format": "uri"
                    }
                  }
                }
              }
            }
          }
        }
      },
      "CrawlResponse": {
        "type": "object",
        "properties": {
          "jobId": {
            "type": "string"
          }
        }
      }
    }
  },
  "security": [
    {
      "bearerAuth": []
    }
  ]
}
Create openapi.json 2024-04-18 13:23:10 +08:00			`{`
Update openapi.json 2024-04-22 23:41:54 +08:00			`"openapi": "3.0.0",`
			`"info": {`
			`"title": "Firecrawl API",`
			`"version": "1.0.0",`
			`"description": "API for interacting with Firecrawl services to perform web scraping and crawling tasks.",`
			`"contact": {`
			`"name": "Firecrawl Support",`
			`"url": "https://firecrawl.dev/support",`
			`"email": "support@firecrawl.dev"`
			`}`
			`},`
			`"servers": [`
			`{`
			`"url": "https://api.firecrawl.dev/v0"`
			`}`
			`],`
			`"paths": {`
			`"/scrape": {`
			`"post": {`
Nick: 2024-05-16 03:11:16 +08:00			`"summary": "Scrape a single URL and optionally extract information using an LLM",`
			`"operationId": "scrapeAndExtractFromUrl",`
Update openapi.json 2024-04-22 23:41:54 +08:00			`"tags": ["Scraping"],`
			`"security": [`
			`{`
			`"bearerAuth": []`
			`}`
			`],`
			`"requestBody": {`
			`"required": true,`
			`"content": {`
			`"application/json": {`
			`"schema": {`
			`"type": "object",`
			`"properties": {`
			`"url": {`
			`"type": "string",`
			`"format": "uri",`
			`"description": "The URL to scrape"`
Create openapi.json 2024-04-18 13:23:10 +08:00			`},`
Update openapi.json 2024-04-22 23:41:54 +08:00			`"pageOptions": {`
			`"type": "object",`
			`"properties": {`
			`"onlyMainContent": {`
			`"type": "boolean",`
			`"description": "Only return the main content of the page excluding headers, navs, footers, etc.",`
			`"default": false`
Nick: 2024-05-16 03:11:16 +08:00			`},`
			`"includeHtml": {`
			`"type": "boolean",`
			`"description": "Include the raw HTML content of the page. Will output a html key in the response.",`
			`"default": false`
Nick: 2024-05-29 03:56:24 +08:00			`},`
Update openapi.json 2024-06-11 09:26:25 +08:00			`"screenshot": {`
			`"type": "boolean",`
			`"description": "Include a screenshot of the top of the page that you are scraping.",`
			`"default": false`
			`},`
Nick: 2024-05-29 03:56:24 +08:00			`"waitFor": {`
			`"type": "integer",`
			`"description": "Wait x amount of milliseconds for the page to load to fetch content",`
			`"default": 0`
Update openapi.json 2024-06-11 09:26:25 +08:00			`},`
			`"headers": {`
			`"type": "object",`
			`"description": "Headers to send with the request. Can be used to send cookies, user-agent, etc."`
Nick: 2024-05-16 03:11:16 +08:00			`}`
			`}`
			`},`
			`"extractorOptions": {`
			`"type": "object",`
			`"description": "Options for LLM-based extraction of structured information from the page content",`
			`"properties": {`
			`"mode": {`
			`"type": "string",`
			`"enum": ["llm-extraction"],`
			`"description": "The extraction mode to use, currently supports 'llm-extraction'"`
			`},`
			`"extractionPrompt": {`
			`"type": "string",`
			`"description": "A prompt describing what information to extract from the page"`
			`},`
			`"extractionSchema": {`
			`"type": "object",`
			`"additionalProperties": true,`
			`"description": "The schema for the data to be extracted",`
			`"required": [`
			`"company_mission",`
			`"supports_sso",`
			`"is_open_source"`
			`]`
Update openapi.json 2024-04-22 23:41:54 +08:00			`}`
			`}`
Nick: 2024-05-16 03:11:16 +08:00			`},`
			`"timeout": {`
			`"type": "integer",`
			`"description": "Timeout in milliseconds for the request",`
			`"default": 30000`
Create openapi.json 2024-04-18 13:23:10 +08:00			`}`
Update openapi.json 2024-04-22 23:41:54 +08:00			`},`
			`"required": ["url"]`
Create openapi.json 2024-04-18 13:23:10 +08:00			`}`
			`}`
			`}`
Update openapi.json 2024-04-22 23:41:54 +08:00			`},`
			`"responses": {`
			`"200": {`
			`"description": "Successful response",`
Create openapi.json 2024-04-18 13:23:10 +08:00			`"content": {`
			`"application/json": {`
			`"schema": {`
Update openapi.json 2024-04-22 23:41:54 +08:00			`"$ref": "#/components/schemas/ScrapeResponse"`
Create openapi.json 2024-04-18 13:23:10 +08:00			`}`
			`}`
			`}`
			`},`
Update openapi.json 2024-04-22 23:41:54 +08:00			`"402": {`
			`"description": "Payment required"`
			`},`
			`"429": {`
			`"description": "Too many requests"`
			`},`
			`"500": {`
			`"description": "Server error"`
Create openapi.json 2024-04-18 13:23:10 +08:00			`}`
			`}`
Update openapi.json 2024-04-22 23:41:54 +08:00			`}`
			`},`
			`"/crawl": {`
			`"post": {`
			`"summary": "Crawl multiple URLs based on options",`
			`"operationId": "crawlUrls",`
			`"tags": ["Crawling"],`
			`"security": [`
			`{`
			`"bearerAuth": []`
			`}`
			`],`
			`"requestBody": {`
			`"required": true,`
			`"content": {`
			`"application/json": {`
Create openapi.json 2024-04-18 13:23:10 +08:00			`"schema": {`
Update openapi.json 2024-04-22 23:41:54 +08:00			`"type": "object",`
			`"properties": {`
			`"url": {`
			`"type": "string",`
			`"format": "uri",`
			`"description": "The base URL to start crawling from"`
			`},`
			`"crawlerOptions": {`
Create openapi.json 2024-04-18 13:23:10 +08:00			`"type": "object",`
			`"properties": {`
Update openapi.json 2024-04-22 23:41:54 +08:00			`"includes": {`
			`"type": "array",`
			`"items": {`
			`"type": "string"`
			`},`
			`"description": "URL patterns to include"`
Create openapi.json 2024-04-18 13:23:10 +08:00			`},`
Update openapi.json 2024-04-22 23:41:54 +08:00			`"excludes": {`
			`"type": "array",`
			`"items": {`
			`"type": "string"`
			`},`
			`"description": "URL patterns to exclude"`
Create openapi.json 2024-04-18 13:23:10 +08:00			`},`
Update openapi.json 2024-04-22 23:41:54 +08:00			`"generateImgAltText": {`
			`"type": "boolean",`
			`"description": "Generate alt text for images using LLMs (must have a paid plan)",`
			`"default": false`
Create openapi.json 2024-04-18 13:23:10 +08:00			`},`
Update openapi.json 2024-04-22 23:41:54 +08:00			`"returnOnlyUrls": {`
			`"type": "boolean",`
			`"description": "If true, returns only the URLs as a list on the crawl status. Attention: the return response will be a list of URLs inside the data, not a list of documents.",`
			`"default": false`
Create openapi.json 2024-04-18 13:23:10 +08:00			`},`
Nick: 2024-05-16 03:11:16 +08:00			`"maxDepth": {`
			`"type": "integer",`
			`"description": "Maximum depth to crawl. Depth 1 is the base URL, depth 2 is the base URL and its direct children, and so on."`
			`},`
			`"mode": {`
			`"type": "string",`
			`"enum": ["default", "fast"],`
			`"description": "The crawling mode to use. Fast mode crawls 4x faster websites without sitemap, but may not be as accurate and shouldn't be used in heavy js-rendered websites.",`
			`"default": "default"`
			`},`
Update openapi.json 2024-06-11 09:26:25 +08:00			`"ignoreSitemap": {`
			`"type": "boolean",`
			`"description": "Ignore the website sitemap when crawling",`
			`"default": false`
			`},`
Update openapi.json 2024-06-12 03:08:49 +08:00			`"replaceAllPathsWithAbsolutePaths": {`
			`"type": "boolean",`
			`"description": "Replace all relative paths with absolute paths for images and links",`
			`"default": false`
			`},`
Update openapi.json 2024-04-22 23:41:54 +08:00			`"limit": {`
Create openapi.json 2024-04-18 13:23:10 +08:00			`"type": "integer",`
Added default value for crawlOptions.limit 2024-05-10 22:59:33 +08:00			`"description": "Maximum number of pages to crawl",`
			`"default": 10000`
Update openapi.json 2024-04-22 23:41:54 +08:00			`}`
			`}`
			`},`
			`"pageOptions": {`
			`"type": "object",`
			`"properties": {`
			`"onlyMainContent": {`
			`"type": "boolean",`
			`"description": "Only return the main content of the page excluding headers, navs, footers, etc.",`
			`"default": false`
Nick: 2024-05-16 03:11:16 +08:00			`},`
			`"includeHtml": {`
			`"type": "boolean",`
			`"description": "Include the raw HTML content of the page. Will output a html key in the response.",`
			`"default": false`
Update openapi.json 2024-06-11 09:26:25 +08:00			`},`
			`"screenshot": {`
			`"type": "boolean",`
			`"description": "Include a screenshot of the top of the page that you are scraping.",`
			`"default": false`
			`},`
			`"headers": {`
			`"type": "object",`
			`"description": "Headers to send with the request when scraping. Can be used to send cookies, user-agent, etc."`
Create openapi.json 2024-04-18 13:23:10 +08:00			`}`
			`}`
			`}`
Update openapi.json 2024-04-22 23:41:54 +08:00			`},`
			`"required": ["url"]`
			`}`
			`}`
			`}`
			`},`
			`"responses": {`
			`"200": {`
			`"description": "Successful response",`
			`"content": {`
			`"application/json": {`
			`"schema": {`
			`"$ref": "#/components/schemas/CrawlResponse"`
Create openapi.json 2024-04-18 13:23:10 +08:00			`}`
			`}`
			`}`
Update openapi.json 2024-04-22 23:41:54 +08:00			`},`
			`"402": {`
			`"description": "Payment required"`
			`},`
			`"429": {`
			`"description": "Too many requests"`
			`},`
			`"500": {`
			`"description": "Server error"`
Create openapi.json 2024-04-18 13:23:10 +08:00			`}`
			`}`
			`}`
			`},`
Update openapi.json 2024-04-25 01:11:44 +08:00			`"/search": {`
			`"post": {`
			`"summary": "Search for a keyword in Google, returns top page results with markdown content for each page",`
			`"operationId": "searchGoogle",`
			`"tags": ["Search"],`
			`"security": [`
			`{`
			`"bearerAuth": []`
			`}`
			`],`
			`"requestBody": {`
			`"required": true,`
			`"content": {`
			`"application/json": {`
			`"schema": {`
			`"type": "object",`
			`"properties": {`
			`"query": {`
			`"type": "string",`
			`"format": "uri",`
Update openapi.json 2024-05-17 02:03:32 +08:00			`"description": "The query to search for"`
Update openapi.json 2024-04-25 01:11:44 +08:00			`},`
			`"pageOptions": {`
			`"type": "object",`
			`"properties": {`
			`"onlyMainContent": {`
			`"type": "boolean",`
			`"description": "Only return the main content of the page excluding headers, navs, footers, etc.",`
			`"default": false`
			`},`
			`"fetchPageContent": {`
			`"type": "boolean",`
			`"description": "Fetch the content of each page. If false, defaults to a basic fast serp API.",`
			`"default": true`
Nick: 2024-05-16 03:11:16 +08:00			`},`
			`"includeHtml": {`
			`"type": "boolean",`
			`"description": "Include the raw HTML content of the page. Will output a html key in the response.",`
			`"default": false`
Update openapi.json 2024-04-25 01:11:44 +08:00			`}`
			`}`
			`},`
			`"searchOptions": {`
			`"type": "object",`
			`"properties": {`
			`"limit": {`
			`"type": "integer",`
			`"description": "Maximum number of results. Max is 20 during beta."`
			`}`
			`}`
			`}`
			`},`
			`"required": ["query"]`
			`}`
			`}`
			`}`
			`},`
			`"responses": {`
			`"200": {`
			`"description": "Successful response",`
			`"content": {`
			`"application/json": {`
			`"schema": {`
			`"$ref": "#/components/schemas/SearchResponse"`
			`}`
			`}`
			`}`
			`},`
			`"402": {`
			`"description": "Payment required"`
			`},`
			`"429": {`
			`"description": "Too many requests"`
			`},`
			`"500": {`
			`"description": "Server error"`
			`}`
			`}`
			`}`
			`},`
Update openapi.json 2024-04-22 23:41:54 +08:00			`"/crawl/status/{jobId}": {`
			`"get": {`
			`"tags": ["Crawl"],`
			`"summary": "Get the status of a crawl job",`
			`"operationId": "getCrawlStatus",`
			`"security": [`
			`{`
			`"bearerAuth": []`
			`}`
			`],`
			`"parameters": [`
			`{`
			`"name": "jobId",`
			`"in": "path",`
			`"description": "ID of the crawl job",`
			`"required": true,`
			`"schema": {`
			`"type": "string"`
			`}`
			`}`
			`],`
			`"responses": {`
			`"200": {`
			`"description": "Successful response",`
			`"content": {`
			`"application/json": {`
			`"schema": {`
Create openapi.json 2024-04-18 13:23:10 +08:00			`"type": "object",`
			`"properties": {`
Update openapi.json 2024-04-22 23:41:54 +08:00			`"status": {`
			`"type": "string",`
			`"description": "Status of the job (completed, active, failed, paused)"`
Create openapi.json 2024-04-18 13:23:10 +08:00			`},`
Update openapi.json 2024-04-22 23:41:54 +08:00			`"current": {`
			`"type": "integer",`
			`"description": "Current page number"`
Create openapi.json 2024-04-18 13:23:10 +08:00			`},`
Update openapi.json 2024-04-22 23:41:54 +08:00			`"current_url": {`
Create openapi.json 2024-04-18 13:23:10 +08:00			`"type": "string",`
Update openapi.json 2024-04-22 23:41:54 +08:00			`"description": "Current URL being scraped"`
Create openapi.json 2024-04-18 13:23:10 +08:00			`},`
Update openapi.json 2024-04-22 23:41:54 +08:00			`"current_step": {`
Create openapi.json 2024-04-18 13:23:10 +08:00			`"type": "string",`
Update openapi.json 2024-04-22 23:41:54 +08:00			`"description": "Current step in the process"`
			`},`
			`"total": {`
			`"type": "integer",`
			`"description": "Total number of pages"`
			`},`
			`"data": {`
			`"type": "array",`
			`"items": {`
Update openapi.json 2024-05-17 02:03:32 +08:00			`"$ref": "#/components/schemas/CrawlStatusResponseObj"`
Update openapi.json 2024-04-22 23:41:54 +08:00			`},`
			`"description": "Data returned from the job (null when it is in progress)"`
Nick: 2024-05-16 03:11:16 +08:00			`},`
			`"partial_data": {`
			`"type": "array",`
			`"items": {`
Update openapi.json 2024-05-17 02:03:32 +08:00			`"$ref": "#/components/schemas/CrawlStatusResponseObj"`
Nick: 2024-05-16 03:11:16 +08:00			`},`
Update openapi.json 2024-06-11 09:26:25 +08:00			`"description": "Partial documents returned as it is being crawled (streaming). This feature is currently in alpha - expect breaking changes When a page is ready, it will append to the partial_data array, so there is no need to wait for the entire website to be crawled. There is a max of 50 items in the array response. The oldest item (top of the array) will be removed when the new item is added to the array."`
Nick: 2024-05-16 03:11:16 +08:00			`}`
			`}`
			`}`
			`}`
			`}`
			`},`
			`"402": {`
			`"description": "Payment required"`
			`},`
			`"429": {`
			`"description": "Too many requests"`
			`},`
			`"500": {`
			`"description": "Server error"`
			`}`
			`}`
			`}`
			`},`
			`"/crawl/cancel/{jobId}": {`
			`"delete": {`
			`"tags": ["Crawl"],`
			`"summary": "Cancel a crawl job",`
			`"operationId": "cancelCrawlJob",`
			`"security": [`
			`{`
			`"bearerAuth": []`
			`}`
			`],`
			`"parameters": [`
			`{`
			`"name": "jobId",`
			`"in": "path",`
			`"description": "ID of the crawl job",`
			`"required": true,`
			`"schema": {`
			`"type": "string"`
			`}`
			`}`
			`],`
			`"responses": {`
			`"200": {`
			`"description": "Successful response",`
			`"content": {`
			`"application/json": {`
			`"schema": {`
			`"type": "object",`
			`"properties": {`
			`"status": {`
			`"type": "string",`
			`"description": "Returns cancelled."`
Create openapi.json 2024-04-18 13:23:10 +08:00			`}`
			`}`
			`}`
			`}`
			`}`
Update openapi.json 2024-04-22 23:41:54 +08:00			`},`
			`"402": {`
			`"description": "Payment required"`
			`},`
			`"429": {`
			`"description": "Too many requests"`
			`},`
			`"500": {`
			`"description": "Server error"`
Create openapi.json 2024-04-18 13:23:10 +08:00			`}`
			`}`
			`}`
Update openapi.json 2024-04-22 23:41:54 +08:00			`}`
			`},`
			`"components": {`
			`"securitySchemes": {`
			`"bearerAuth": {`
			`"type": "http",`
			`"scheme": "bearer"`
			`}`
Create openapi.json 2024-04-18 13:23:10 +08:00			`},`
Update openapi.json 2024-04-22 23:41:54 +08:00			`"schemas": {`
			`"ScrapeResponse": {`
			`"type": "object",`
			`"properties": {`
			`"success": {`
			`"type": "boolean"`
			`},`
			`"data": {`
			`"type": "object",`
			`"properties": {`
Update openapi.json 2024-04-25 01:11:44 +08:00			`"markdown": {`
			`"type": "string"`
			`},`
Update openapi.json 2024-04-22 23:41:54 +08:00			`"content": {`
			`"type": "string"`
			`},`
Nick: 2024-05-16 03:11:16 +08:00			`"html": {`
			`"type": "string",`
			`"nullable": true,`
			"description": "Raw HTML content of the page if `includeHtml` is true"
			`},`
Update openapi.json 2024-04-25 01:11:44 +08:00			`"metadata": {`
			`"type": "object",`
			`"properties": {`
			`"title": {`
			`"type": "string"`
			`},`
			`"description": {`
			`"type": "string"`
			`},`
			`"language": {`
			`"type": "string",`
			`"nullable": true`
			`},`
			`"sourceURL": {`
			`"type": "string",`
			`"format": "uri"`
			`}`
			`}`
Update openapi.json 2024-05-21 08:10:55 +08:00			`},`
			`"llm_extraction": {`
			`"type": "object",`
			`"description": "Displayed when using LLM Extraction. Extracted data from the page following the schema defined.",`
			`"nullable": true`
			`},`
			`"warning": {`
			`"type": "string",`
			`"nullable": true,`
			`"description": "Can be displayed when using LLM Extraction. Warning message will let you know any issues with the extraction."`
Update openapi.json 2024-04-25 01:11:44 +08:00			`}`
			`}`
			`}`
			`}`
			`},`
Update openapi.json 2024-05-17 02:03:32 +08:00			`"CrawlStatusResponseObj": {`
			`"type": "object",`
			`"properties": {`
			`"markdown": {`
			`"type": "string"`
			`},`
			`"content": {`
			`"type": "string"`
			`},`
			`"html": {`
			`"type": "string",`
			`"nullable": true,`
			"description": "Raw HTML content of the page if `includeHtml` is true"
			`},`
Update openapi.json 2024-06-11 09:26:25 +08:00			`"index": {`
			`"type": "integer",`
			"description": "The number of the page that was crawled. This is useful for `partial_data` so you know which page the data is from."
			`},`
Update openapi.json 2024-05-17 02:03:32 +08:00			`"metadata": {`
			`"type": "object",`
			`"properties": {`
			`"title": {`
			`"type": "string"`
			`},`
			`"description": {`
			`"type": "string"`
			`},`
			`"language": {`
			`"type": "string",`
			`"nullable": true`
			`},`
			`"sourceURL": {`
			`"type": "string",`
			`"format": "uri"`
			`}`
			`}`
			`}`
			`}`
			`},`
Update openapi.json 2024-04-25 01:11:44 +08:00			`"SearchResponse": {`
			`"type": "object",`
			`"properties": {`
			`"success": {`
			`"type": "boolean"`
			`},`
			`"data": {`
Update openapi.json 2024-04-26 04:28:07 +08:00			`"type": "array",`
			`"items": {`
			`"type": "object",`
			`"properties": {`
			`"url": {`
			`"type": "string"`
			`},`
			`"markdown": {`
			`"type": "string"`
			`},`
			`"content": {`
			`"type": "string"`
			`},`
			`"metadata": {`
			`"type": "object",`
			`"properties": {`
			`"title": {`
			`"type": "string"`
			`},`
			`"description": {`
			`"type": "string"`
			`},`
			`"language": {`
			`"type": "string",`
			`"nullable": true`
			`},`
			`"sourceURL": {`
			`"type": "string",`
			`"format": "uri"`
			`}`
Update openapi.json 2024-04-22 23:41:54 +08:00			`}`
			`}`
			`}`
			`}`
			`}`
			`}`
			`},`
			`"CrawlResponse": {`
			`"type": "object",`
			`"properties": {`
			`"jobId": {`
			`"type": "string"`
			`}`
			`}`
Create openapi.json 2024-04-18 13:23:10 +08:00			`}`
Update openapi.json 2024-04-22 23:41:54 +08:00			`}`
			`},`
			`"security": [`
			`{`
			`"bearerAuth": []`
			`}`
			`]`
			`}`