firecrawl/apps/api/openapi.json

{
  "openapi": "3.0.0",
  "info": {
    "title": "Firecrawl API",
    "version": "1.0.0",
    "description": "API for interacting with Firecrawl services to perform web scraping and crawling tasks.",
    "contact": {
      "name": "Firecrawl Support",
      "url": "https://firecrawl.dev/support",
      "email": "support@firecrawl.dev"
    }
  },
  "servers": [
    {
      "url": "https://api.firecrawl.dev/v0"
    }
  ],
  "paths": {
    "/scrape": {
      "post": {
        "summary": "Scrape a single URL and optionally extract information using an LLM",
        "operationId": "scrapeAndExtractFromUrl",
        "tags": ["Scraping"],
        "security": [
          {
            "bearerAuth": []
          }
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "type": "object",
                "properties": {
                  "url": {
                    "type": "string",
                    "format": "uri",
                    "description": "The URL to scrape"
                  },
                  "pageOptions": {
                    "type": "object",
                    "properties": {
                      "headers": {
                        "type": "object",
                        "description": "Headers to send with the request. Can be used to send cookies, user-agent, etc."
                      },
                      "includeHtml": {
                        "type": "boolean",
                        "description": "Include the HTML version of the content on page. Will output a html key in the response.",
                        "default": false
                      },
                      "includeRawHtml": {
                        "type": "boolean",
                        "description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
                        "default": false
                      },
                      "onlyIncludeTags": {
                        "type": "array",
                        "items": {
                          "type": "string"
                        },
                        "description": "Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'"
                      },
                      "onlyMainContent": {
                        "type": "boolean",
                        "description": "Only return the main content of the page excluding headers, navs, footers, etc.",
                        "default": false
                      },
                      "removeTags": {
                        "type": "array",
                        "items": {
                          "type": "string"
                        },
                        "description": "Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'"
                      },
                      "replaceAllPathsWithAbsolutePaths": {
                        "type": "boolean",
                        "description": "Replace all relative paths with absolute paths for images and links",
                        "default": false
                      },
                      "screenshot": {
                        "type": "boolean",
                        "description": "Include a screenshot of the top of the page that you are scraping.",
                        "default": false
                      },
                      "waitFor": {
                        "type": "integer",
                        "description": "Wait x amount of milliseconds for the page to load to fetch content",
                        "default": 0
                      }
                    }
                  },
                  "extractorOptions": {
                    "type": "object",
                    "description": "Options for extraction of structured information from the page content. Note: LLM-based extraction is not performed by default and only occurs when explicitly configured. The 'markdown' mode simply returns the scraped markdown and is the default mode for scraping.",
                    "default": {},
                    "properties": {
                      "mode": {
                        "type": "string",
                        "enum": ["markdown", "llm-extraction", "llm-extraction-from-raw-html", "llm-extraction-from-markdown"],
                        "description": "The extraction mode to use. 'markdown': Returns the scraped markdown content, does not perform LLM extraction. 'llm-extraction': Extracts information from the cleaned and parsed content using LLM. 'llm-extraction-from-raw-html': Extracts information directly from the raw HTML using LLM. 'llm-extraction-from-markdown': Extracts information from the markdown content using LLM."
                      },
                      "extractionPrompt": {
                        "type": "string",
                        "description": "A prompt describing what information to extract from the page, applicable for LLM extraction modes."
                      },
                      "extractionSchema": {
                        "type": "object",
                        "additionalProperties": true,
                        "description": "The schema for the data to be extracted, required only for LLM extraction modes.",
                        "required": [
                          "company_mission",
                          "supports_sso",
                          "is_open_source"
                        ]
                      }
                    }
                  },
                  "timeout": {
                    "type": "integer",
                    "description": "Timeout in milliseconds for the request",
                    "default": 30000
                  }
                },
                "required": ["url"]
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Successful response",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/ScrapeResponse"
                }
              }
            }
          },
          "402": {
            "description": "Payment required",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Payment required to access this resource."
                    }
                  }
                }
              }
            }
          },
          "429": {
            "description": "Too many requests",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Request rate limit exceeded. Please wait and try again later."
                    }
                  }
                }
              }
            }
          },
          "500": {
            "description": "Server error",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "An unexpected error occurred on the server."
                    }
                  }
                }
              }
            }
          }
        }
      }
    },
    "/crawl": {
      "post": {
        "summary": "Crawl multiple URLs based on options",
        "operationId": "crawlUrls",
        "tags": ["Crawling"],
        "security": [
          {
            "bearerAuth": []
          }
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "type": "object",
                "properties": {
                  "url": {
                    "type": "string",
                    "format": "uri",
                    "description": "The base URL to start crawling from"
                  },
                  "crawlerOptions": {
                    "type": "object",
                    "properties": {
                      "includes": {
                        "type": "array",
                        "items": {
                          "type": "string"
                        },
                        "description": "URL patterns to include"
                      },
                      "excludes": {
                        "type": "array",
                        "items": {
                          "type": "string"
                        },
                        "description": "URL patterns to exclude"
                      },
                      "generateImgAltText": {
                        "type": "boolean",
                        "description": "Generate alt text for images using LLMs (must have a paid plan)",
                        "default": false
                      },
                      "returnOnlyUrls": {
                        "type": "boolean",
                        "description": "If true, returns only the URLs as a list on the crawl status. Attention: the return response will be a list of URLs inside the data, not a list of documents.",
                        "default": false
                      },
                      "maxDepth": {
                        "type": "integer",
                        "description": "Maximum depth to crawl relative to the entered URL. A maxDepth of 0 scrapes only the entered URL. A maxDepth of 1 scrapes the entered URL and all pages one level deep. A maxDepth of 2 scrapes the entered URL and all pages up to two levels deep. Higher values follow the same pattern."
                      },
                      "mode": {
                        "type": "string",
                        "enum": ["default", "fast"],
                        "description": "The crawling mode to use. Fast mode crawls 4x faster websites without sitemap, but may not be as accurate and shouldn't be used in heavy js-rendered websites.",
                        "default": "default"
                      },
                      "ignoreSitemap": {
                        "type": "boolean",
                        "description": "Ignore the website sitemap when crawling",
                        "default": false
                      },
                      "limit": {
                        "type": "integer",
                        "description": "Maximum number of pages to crawl",
                        "default": 10000
                      },
                      "allowBackwardCrawling": {
                        "type": "boolean",
                        "description": "Enables the crawler to navigate from a specific URL to previously linked pages. For instance, from 'example.com/product/123' back to 'example.com/product'",
                        "default": false
                      },
                      "allowExternalContentLinks": {
                        "type": "boolean",
                        "description": "Allows the crawler to follow links to external websites.",
                        "default": false
                      }
                    }
                  },
                  "pageOptions": {
                    "type": "object",
                    "properties": {
                      "headers": {
                        "type": "object",
                        "description": "Headers to send with the request. Can be used to send cookies, user-agent, etc."
                      },
                      "includeHtml": {
                        "type": "boolean",
                        "description": "Include the HTML version of the content on page. Will output a html key in the response.",
                        "default": false
                      },
                      "includeRawHtml": {
                        "type": "boolean",
                        "description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
                        "default": false
                      },
                      "onlyIncludeTags": {
                        "type": "array",
                        "items": {
                          "type": "string"
                        },
                        "description": "Only include tags, classes and ids from the page in the final output. Use comma separated values. Example: 'script, .ad, #footer'"
                      },
                      "onlyMainContent": {
                        "type": "boolean",
                        "description": "Only return the main content of the page excluding headers, navs, footers, etc.",
                        "default": false
                      },
                      "removeTags": {
                        "type": "array",
                        "items": {
                          "type": "string"
                        },
                        "description": "Tags, classes and ids to remove from the page. Use comma separated values. Example: 'script, .ad, #footer'"
                      },
                      "replaceAllPathsWithAbsolutePaths": {
                        "type": "boolean",
                        "description": "Replace all relative paths with absolute paths for images and links",
                        "default": false
                      },
                      "screenshot": {
                        "type": "boolean",
                        "description": "Include a screenshot of the top of the page that you are scraping.",
                        "default": false
                      },
                      "waitFor": {
                        "type": "integer",
                        "description": "Wait x amount of milliseconds for the page to load to fetch content",
                        "default": 0
                      }
                    }
                  }
                },
                "required": ["url"]
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Successful response",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/CrawlResponse"
                }
              }
            }
          },
          "402": {
            "description": "Payment required",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Payment required to access this resource."
                    }
                  }
                }
              }
            }
          },
          "429": {
            "description": "Too many requests",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Request rate limit exceeded. Please wait and try again later."
                    }
                  }
                }
              }
            }
          },
          "500": {
            "description": "Server error",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "An unexpected error occurred on the server."
                    }
                  }
                }
              }
            }
          }
        }
      }
    },
    "/search": {
      "post": {
        "summary": "Search for a keyword in Google, returns top page results with markdown content for each page",
        "operationId": "searchGoogle",
        "tags": ["Search"],
        "security": [
          {
            "bearerAuth": []
          }
        ],
        "requestBody": {
          "required": true,
          "content": {
            "application/json": {
              "schema": {
                "type": "object",
                "properties": {
                  "query": {
                    "type": "string",
                    "format": "uri",
                    "description": "The query to search for"
                  },
                  "pageOptions": {
                    "type": "object",
                    "properties": {
                      "onlyMainContent": {
                        "type": "boolean",
                        "description": "Only return the main content of the page excluding headers, navs, footers, etc.",
                        "default": false
                      },
                      "fetchPageContent": {
                        "type": "boolean",
                        "description": "Fetch the content of each page. If false, defaults to a basic fast serp API.",
                        "default": true
                      },
                      "includeHtml": {
                        "type": "boolean",
                        "description": "Include the HTML version of the content on page. Will output a html key in the response.",
                        "default": false
                      },
                      "includeRawHtml": {
                        "type": "boolean",
                        "description": "Include the raw HTML content of the page. Will output a rawHtml key in the response.",
                        "default": false
                      }
                    }
                  },
                  "searchOptions": {
                    "type": "object",
                    "properties": {
                      "limit": {
                        "type": "integer",
                        "description": "Maximum number of results. Max is 20 during beta."
                      }
                    }
                  }
                },
                "required": ["query"]
              }
            }
          }
        },
        "responses": {
          "200": {
            "description": "Successful response",
            "content": {
              "application/json": {
                "schema": {
                  "$ref": "#/components/schemas/SearchResponse"
                }
              }
            }
          },
          "402": {
            "description": "Payment required",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Payment required to access this resource."
                    }
                  }
                }
              }
            }
          },
          "429": {
            "description": "Too many requests",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Request rate limit exceeded. Please wait and try again later."
                    }
                  }
                }
              }
            }
          },
          "500": {
            "description": "Server error",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "An unexpected error occurred on the server."
                    }
                  }
                }
              }
            }
          }
        }
      }
    },
    "/crawl/status/{jobId}": {
      "get": {
        "tags": ["Crawl"],
        "summary": "Get the status of a crawl job",
        "operationId": "getCrawlStatus",
        "security": [
          {
            "bearerAuth": []
          }
        ],
        "parameters": [
          {
            "name": "jobId",
            "in": "path",
            "description": "ID of the crawl job",
            "required": true,
            "schema": {
              "type": "string"
            }
          }
        ],
        "responses": {
          "200": {
            "description": "Successful response",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "status": {
                      "type": "string",
                      "description": "Status of the job (completed, active, failed, paused)"
                    },
                    "current": {
                      "type": "integer",
                      "description": "Current page number"
                    },
                    "total": {
                      "type": "integer",
                      "description": "Total number of pages"
                    },
                    "data": {
                      "type": "array",
                      "items": {
                        "$ref": "#/components/schemas/CrawlStatusResponseObj"
                      },
                      "description": "Data returned from the job (null when it is in progress)"
                    },
                    "partial_data": {
                      "type": "array",
                      "items": {
                        "$ref": "#/components/schemas/CrawlStatusResponseObj"
                      },
                      "description": "Partial documents returned as it is being crawled (streaming). **This feature is currently in alpha - expect breaking changes** When a page is ready, it will append to the partial_data array, so there is no need to wait for the entire website to be crawled. When the crawl is done, partial_data will become empty and the result will be available in `data`. There is a max of 50 items in the array response. The oldest item (top of the array) will be removed when the new item is added to the array."
                    }
                  }
                }
              }
            }
          },
          "402": {
            "description": "Payment required",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Payment required to access this resource."
                    }
                  }
                }
              }
            }
          },
          "429": {
            "description": "Too many requests",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Request rate limit exceeded. Please wait and try again later."
                    }
                  }
                }
              }
            }
          },
          "500": {
            "description": "Server error",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "An unexpected error occurred on the server."
                    }
                  }
                }
              }
            }
          }
        }
      }
    },
    "/crawl/cancel/{jobId}": {
      "delete": {
        "tags": ["Crawl"],
        "summary": "Cancel a crawl job",
        "operationId": "cancelCrawlJob",
        "security": [
          {
            "bearerAuth": []
          }
        ],
        "parameters": [
          {
            "name": "jobId",
            "in": "path",
            "description": "ID of the crawl job",
            "required": true,
            "schema": {
              "type": "string"
            }
          }
        ],
        "responses": {
          "200": {
            "description": "Successful response",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "status": {
                      "type": "string",
                      "description": "Returns cancelled."
                    }
                  }
                }
              }
            }
          },
          "402": {
            "description": "Payment required",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Payment required to access this resource."
                    }
                  }
                }
              }
            }
          },
          "429": {
            "description": "Too many requests",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "Request rate limit exceeded. Please wait and try again later."
                    }
                  }
                }
              }
            }
          },
          "500": {
            "description": "Server error",
            "content": {
              "application/json": {
                "schema": {
                  "type": "object",
                  "properties": {
                    "error": {
                      "type": "string",
                      "example": "An unexpected error occurred on the server."
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  },
  "components": {
    "securitySchemes": {
      "bearerAuth": {
        "type": "http",
        "scheme": "bearer"
      }
    },
    "schemas": {
      "ScrapeResponse": {
        "type": "object",
        "properties": {
          "success": {
            "type": "boolean"
          },
          "data": {
            "type": "object",
            "properties": {
              "markdown": {
                "type": "string"
              },
              "content": {
                "type": "string"
              },
              "html": {
                "type": "string",
                "nullable": true,
                "description": "HTML version of the content on page if `includeHtml`  is true"
              },
              "rawHtml": {
                "type": "string",
                "nullable": true,
                "description": "Raw HTML content of the page if `includeRawHtml`  is true"
              },
              "metadata": {
                "type": "object",
                "properties": {
                  "title": {
                    "type": "string"
                  },
                  "description": {
                    "type": "string"
                  },
                  "language": {
                    "type": "string",
                    "nullable": true
                  },
                  "sourceURL": {
                    "type": "string",
                    "format": "uri"
                  },
                  "<any other metadata> ": {
                    "type": "string"
                  },
                  "pageStatusCode": {
                    "type": "integer",
                    "description": "The status code of the page"
                  },
                  "pageError": {
                    "type": "string",
                    "nullable": true,
                    "description": "The error message of the page"
                  }

                }
              },
              "llm_extraction": {
                "type": "object",
                "description": "Displayed when using LLM Extraction. Extracted data from the page following the schema defined.",
                "nullable": true
              },
              "warning": {
                "type": "string",
                "nullable": true,
                "description": "Can be displayed when using LLM Extraction. Warning message will let you know any issues with the extraction."
              }
            }
          }
        }
      },
      "CrawlStatusResponseObj": {
        "type": "object",
        "properties": {
          "markdown": {
            "type": "string"
          },
          "content": {
            "type": "string"
          },
          "html": {
            "type": "string",
            "nullable": true,
            "description": "HTML version of the content on page if `includeHtml`  is true"
          },
          "rawHtml": {
            "type": "string",
            "nullable": true,
            "description": "Raw HTML content of the page if `includeRawHtml`  is true"
          },
          "index": {
            "type": "integer",
            "description": "The number of the page that was crawled. This is useful for `partial_data` so you know which page the data is from."
          },
          "metadata": {
            "type": "object",
            "properties": {
              "title": {
                "type": "string"
              },
              "description": {
                "type": "string"
              },
              "language": {
                "type": "string",
                "nullable": true
              },
              "sourceURL": {
                "type": "string",
                "format": "uri"
              },
              "<any other metadata> ": {
                "type": "string"
              },
              "pageStatusCode": {
                "type": "integer",
                "description": "The status code of the page"
              },
              "pageError": {
                "type": "string",
                "nullable": true,
                "description": "The error message of the page"
              }
            }
          }
        }
      },
      "SearchResponse": {
        "type": "object",
        "properties": {
          "success": {
            "type": "boolean"
          },
          "data": {
            "type": "array",
            "items": {
              "type": "object",
              "properties": {
                "url": {
                  "type": "string"
                },
                "markdown": {
                  "type": "string"
                },
                "content": {
                  "type": "string"
                },
                "metadata": {
                  "type": "object",
                  "properties": {
                    "title": {
                      "type": "string"
                    },
                    "description": {
                      "type": "string"
                    },
                    "language": {
                      "type": "string",
                      "nullable": true
                    },
                    "sourceURL": {
                      "type": "string",
                      "format": "uri"
                    }
                  }
                }
              }
            }
          }
        }
      },
      "CrawlResponse": {
        "type": "object",
        "properties": {
          "jobId": {
            "type": "string"
          }
        }
      }
    }
  },
  "security": [
    {
      "bearerAuth": []
    }
  ]
}