{ "openapi": "3.0.0", "info": { "title": "Firecrawl API", "version": "1.0.0", "description": "API for interacting with Firecrawl services to perform web scraping and crawling tasks.", "contact": { "name": "Firecrawl Support", "url": "https://firecrawl.dev/support", "email": "support@firecrawl.dev" } }, "servers": [ { "url": "https://api.firecrawl.dev/v0" } ], "paths": { "/scrape": { "post": { "summary": "Scrape a single URL", "operationId": "scrape", "tags": ["Scraping"], "security": [ { "bearerAuth": [] } ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "type": "object", "properties": { "url": { "type": "string", "format": "uri", "description": "The URL to scrape" }, "formats": { "type": "array", "items": { "type": "string", "enum": ["markdown", "html", "rawHtml", "links", "screenshot", "screenshot@fullPage"] }, "description": "Specific formats to return.\n\n - markdown: The page in Markdown format.\n - html: The page's HTML, trimmed to include only meaningful content.\n - rawHtml: The page's original HTML.\n - links: The links on the page.\n - screenshot: A screenshot of the top of the page.\n - screenshot@fullPage: A screenshot of the full page. (overridden by screenshot if present)", "default": ["markdown"] }, "headers": { "type": "object", "description": "Headers to send with the request. Can be used to send cookies, user-agent, etc." }, "includeTags": { "type": "array", "items": { "type": "string" }, "description": "Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'" }, "excludeTags": { "type": "array", "items": { "type": "string" }, "description": "Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'" }, "onlyMainContent": { "type": "boolean", "description": "Only return the main content of the page excluding headers, navs, footers, etc.", "default": true }, "timeout": { "type": "integer", "description": "Timeout in milliseconds for the request", "default": 30000 }, "waitFor": { "type": "integer", "description": "Wait x amount of milliseconds for the page to load to fetch content", "default": 0 } }, "required": ["url"] } } } }, "responses": { "200": { "description": "Successful response", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/ScrapeResponse" } } } }, "402": { "description": "Payment required", "content": { "application/json": { "schema": { "type": "object", "properties": { "error": { "type": "string", "example": "Payment required to access this resource." } } } } } }, "429": { "description": "Too many requests", "content": { "application/json": { "schema": { "type": "object", "properties": { "error": { "type": "string", "example": "Request rate limit exceeded. Please wait and try again later." } } } } } }, "500": { "description": "Server error", "content": { "application/json": { "schema": { "type": "object", "properties": { "error": { "type": "string", "example": "An unexpected error occurred on the server." } } } } } } } } }, "/crawl": { "post": { "summary": "Crawl multiple URLs based on options", "operationId": "crawlUrls", "tags": ["Crawling"], "security": [ { "bearerAuth": [] } ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "type": "object", "properties": { "url": { "type": "string", "format": "uri", "description": "The base URL to start crawling from" }, "crawlerOptions": { "type": "object", "properties": { "includes": { "type": "array", "items": { "type": "string" }, "description": "URL patterns to include" }, "excludes": { "type": "array", "items": { "type": "string" }, "description": "URL patterns to exclude" }, "generateImgAltText": { "type": "boolean", "description": "Generate alt text for images using LLMs (must have a paid plan)", "default": false }, "returnOnlyUrls": { "type": "boolean", "description": "If true, returns only the URLs as a list on the crawl status. Attention: the return response will be a list of URLs inside the data, not a list of documents.", "default": false }, "maxDepth": { "type": "integer", "description": "Maximum depth to crawl relative to the entered URL. A maxDepth of 0 scrapes only the entered URL. A maxDepth of 1 scrapes the entered URL and all pages one level deep. A maxDepth of 2 scrapes the entered URL and all pages up to two levels deep. Higher values follow the same pattern." }, "mode": { "type": "string", "enum": ["default", "fast"], "description": "The crawling mode to use. Fast mode crawls 4x faster websites without sitemap, but may not be as accurate and shouldn't be used in heavy js-rendered websites.", "default": "default" }, "ignoreSitemap": { "type": "boolean", "description": "Ignore the website sitemap when crawling", "default": false }, "limit": { "type": "integer", "description": "Maximum number of pages to crawl", "default": 10000 }, "allowBackwardCrawling": { "type": "boolean", "description": "Enables the crawler to navigate from a specific URL to previously linked pages. For instance, from 'example.com/product/123' back to 'example.com/product'", "default": false }, "allowExternalContentLinks": { "type": "boolean", "description": "Allows the crawler to follow links to external websites.", "default": false } } }, "pageOptions": { "type": "object", "properties": { "headers": { "type": "object", "description": "Headers to send with the request. Can be used to send cookies, user-agent, etc." }, "includeHtml": { "type": "boolean", "description": "Include the HTML version of the content on page. Will output a html key in the response.", "default": false }, "includeRawHtml": { "type": "boolean", "description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.", "default": false }, "onlyIncludeTags": { "type": "array", "items": { "type": "string" }, "description": "Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'" }, "onlyMainContent": { "type": "boolean", "description": "Only return the main content of the page excluding headers, navs, footers, etc.", "default": false }, "removeTags": { "type": "array", "items": { "type": "string" }, "description": "Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'" }, "replaceAllPathsWithAbsolutePaths": { "type": "boolean", "description": "Replace all relative paths with absolute paths for images and links", "default": false }, "screenshot": { "type": "boolean", "description": "Include a screenshot of the top of the page that you are scraping.", "default": false }, "fullPageScreenshot": { "type": "boolean", "description": "Include a full page screenshot of the page that you are scraping.", "default": false }, "waitFor": { "type": "integer", "description": "Wait x amount of milliseconds for the page to load to fetch content", "default": 0 } } } }, "required": ["url"] } } } }, "responses": { "200": { "description": "Successful response", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/CrawlResponse" } } } }, "402": { "description": "Payment required", "content": { "application/json": { "schema": { "type": "object", "properties": { "error": { "type": "string", "example": "Payment required to access this resource." } } } } } }, "429": { "description": "Too many requests", "content": { "application/json": { "schema": { "type": "object", "properties": { "error": { "type": "string", "example": "Request rate limit exceeded. Please wait and try again later." } } } } } }, "500": { "description": "Server error", "content": { "application/json": { "schema": { "type": "object", "properties": { "error": { "type": "string", "example": "An unexpected error occurred on the server." } } } } } } } } }, "/search": { "post": { "summary": "Search for a keyword in Google, returns top page results with markdown content for each page", "operationId": "searchGoogle", "tags": ["Search"], "security": [ { "bearerAuth": [] } ], "requestBody": { "required": true, "content": { "application/json": { "schema": { "type": "object", "properties": { "query": { "type": "string", "format": "uri", "description": "The query to search for" }, "pageOptions": { "type": "object", "properties": { "onlyMainContent": { "type": "boolean", "description": "Only return the main content of the page excluding headers, navs, footers, etc.", "default": false }, "fetchPageContent": { "type": "boolean", "description": "Fetch the content of each page. If false, defaults to a basic fast serp API.", "default": true }, "includeHtml": { "type": "boolean", "description": "Include the HTML version of the content on page. Will output a html key in the response.", "default": false }, "includeRawHtml": { "type": "boolean", "description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.", "default": false } } }, "searchOptions": { "type": "object", "properties": { "limit": { "type": "integer", "description": "Maximum number of results. Max is 20 during beta." } } } }, "required": ["query"] } } } }, "responses": { "200": { "description": "Successful response", "content": { "application/json": { "schema": { "$ref": "#/components/schemas/SearchResponse" } } } }, "402": { "description": "Payment required", "content": { "application/json": { "schema": { "type": "object", "properties": { "error": { "type": "string", "example": "Payment required to access this resource." } } } } } }, "429": { "description": "Too many requests", "content": { "application/json": { "schema": { "type": "object", "properties": { "error": { "type": "string", "example": "Request rate limit exceeded. Please wait and try again later." } } } } } }, "500": { "description": "Server error", "content": { "application/json": { "schema": { "type": "object", "properties": { "error": { "type": "string", "example": "An unexpected error occurred on the server." } } } } } } } } }, "/crawl/status/{jobId}": { "get": { "tags": ["Crawl"], "summary": "Get the status of a crawl job", "operationId": "getCrawlStatus", "security": [ { "bearerAuth": [] } ], "parameters": [ { "name": "jobId", "in": "path", "description": "ID of the crawl job", "required": true, "schema": { "type": "string" } } ], "responses": { "200": { "description": "Successful response", "content": { "application/json": { "schema": { "type": "object", "properties": { "status": { "type": "string", "description": "Status of the job (completed, active, failed, paused)" }, "current": { "type": "integer", "description": "Current page number" }, "total": { "type": "integer", "description": "Total number of pages" }, "data": { "type": "array", "items": { "$ref": "#/components/schemas/CrawlStatusResponseObj" }, "description": "Data returned from the job (null when it is in progress)" }, "partial_data": { "type": "array", "items": { "$ref": "#/components/schemas/CrawlStatusResponseObj" }, "description": "Partial documents returned as it is being crawled (streaming). **This feature is currently in alpha - expect breaking changes** When a page is ready, it will append to the partial_data array, so there is no need to wait for the entire website to be crawled. When the crawl is done, partial_data will become empty and the result will be available in `data`. There is a max of 50 items in the array response. The oldest item (top of the array) will be removed when the new item is added to the array." } } } } } }, "402": { "description": "Payment required", "content": { "application/json": { "schema": { "type": "object", "properties": { "error": { "type": "string", "example": "Payment required to access this resource." } } } } } }, "429": { "description": "Too many requests", "content": { "application/json": { "schema": { "type": "object", "properties": { "error": { "type": "string", "example": "Request rate limit exceeded. Please wait and try again later." } } } } } }, "500": { "description": "Server error", "content": { "application/json": { "schema": { "type": "object", "properties": { "error": { "type": "string", "example": "An unexpected error occurred on the server." } } } } } } } } }, "/crawl/cancel/{jobId}": { "delete": { "tags": ["Crawl"], "summary": "Cancel a crawl job", "operationId": "cancelCrawlJob", "security": [ { "bearerAuth": [] } ], "parameters": [ { "name": "jobId", "in": "path", "description": "ID of the crawl job", "required": true, "schema": { "type": "string" } } ], "responses": { "200": { "description": "Successful response", "content": { "application/json": { "schema": { "type": "object", "properties": { "status": { "type": "string", "description": "Returns cancelled." } } } } } }, "402": { "description": "Payment required", "content": { "application/json": { "schema": { "type": "object", "properties": { "error": { "type": "string", "example": "Payment required to access this resource." } } } } } }, "429": { "description": "Too many requests", "content": { "application/json": { "schema": { "type": "object", "properties": { "error": { "type": "string", "example": "Request rate limit exceeded. Please wait and try again later." } } } } } }, "500": { "description": "Server error", "content": { "application/json": { "schema": { "type": "object", "properties": { "error": { "type": "string", "example": "An unexpected error occurred on the server." } } } } } } } } } }, "components": { "securitySchemes": { "bearerAuth": { "type": "http", "scheme": "bearer" } }, "schemas": { "ScrapeResponse": { "type": "object", "properties": { "success": { "type": "boolean" }, "warning": { "type": "string", "nullable": true, "description": "Warning message to let you know of any issues." }, "data": { "type": "object", "properties": { "markdown": { "type": "string", "nullable": true, "description": "Markdown content of the page if the `markdown` format was specified (default)" }, "html": { "type": "string", "nullable": true, "description": "HTML version of the content on page if the `html` format was specified" }, "rawHtml": { "type": "string", "nullable": true, "description": "Raw HTML content of the page if the `rawHtml` format was specified" }, "links": { "type": "array", "items": { "type": "string", "format": "uri" }, "nullable": true, "description": "Links on the page if the `links` format was specified" }, "screenshot": { "type": "string", "nullable": true, "description": "URL of the screenshot of the page if the `screenshot` or `screenshot@fullSize` format was specified" }, "metadata": { "type": "object", "properties": { "title": { "type": "string" }, "description": { "type": "string" }, "language": { "type": "string", "nullable": true }, "sourceURL": { "type": "string", "format": "uri" }, " ": { "type": "string" }, "statusCode": { "type": "integer", "description": "The status code of the page" }, "error": { "type": "string", "nullable": true, "description": "The error message of the page" } } } } } } }, "CrawlStatusResponseObj": { "type": "object", "properties": { "markdown": { "type": "string", "nullable": true, "description": "Markdown content of the page if the `markdown` format was specified (default)" }, "html": { "type": "string", "nullable": true, "description": "HTML version of the content on page if the `html` format was specified" }, "rawHtml": { "type": "string", "nullable": true, "description": "Raw HTML content of the page if the `rawHtml` format was specified" }, "links": { "type": "array", "items": { "type": "string", "format": "uri" }, "nullable": true, "description": "Links on the page if the `links` format was specified" }, "screenshot": { "type": "string", "nullable": true, "description": "URL of the screenshot of the page if the `screenshot` or `screenshot@fullSize` format was specified" }, "metadata": { "type": "object", "properties": { "title": { "type": "string" }, "description": { "type": "string" }, "language": { "type": "string", "nullable": true }, "sourceURL": { "type": "string", "format": "uri" }, " ": { "type": "string" }, "statusCode": { "type": "integer", "description": "The status code of the page" }, "error": { "type": "string", "nullable": true, "description": "The error message of the page" } } } } }, "SearchResponse": { "type": "object", "properties": { "success": { "type": "boolean" }, "data": { "type": "array", "items": { "markdown": { "type": "string", "nullable": true, "description": "Markdown content of the page if the `markdown` format was specified (default)" }, "html": { "type": "string", "nullable": true, "description": "HTML version of the content on page if the `html` format was specified" }, "rawHtml": { "type": "string", "nullable": true, "description": "Raw HTML content of the page if the `rawHtml` format was specified" }, "links": { "type": "array", "items": { "type": "string", "format": "uri" }, "nullable": true, "description": "Links on the page if the `links` format was specified" }, "screenshot": { "type": "string", "nullable": true, "description": "URL of the screenshot of the page if the `screenshot` or `screenshot@fullSize` format was specified" }, "metadata": { "type": "object", "properties": { "title": { "type": "string" }, "description": { "type": "string" }, "language": { "type": "string", "nullable": true }, "sourceURL": { "type": "string", "format": "uri" }, " ": { "type": "string" }, "statusCode": { "type": "integer", "description": "The status code of the page" }, "error": { "type": "string", "nullable": true, "description": "The error message of the page" } } } } } } }, "CrawlResponse": { "type": "object", "properties": { "success": { "type": "boolean" }, "id": { "type": "string" }, "url": { "type": "string", "format": "uri" } } } } }, "security": [ { "bearerAuth": [] } ] }