mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
39 lines
1.1 KiB
Python
39 lines
1.1 KiB
Python
from fastapi import FastAPI
|
|
from playwright.async_api import async_playwright, Browser
|
|
from fastapi.responses import JSONResponse
|
|
from pydantic import BaseModel
|
|
|
|
app = FastAPI()
|
|
|
|
class UrlModel(BaseModel):
|
|
url: str
|
|
wait: int = None
|
|
|
|
|
|
browser: Browser = None
|
|
|
|
|
|
@app.on_event("startup")
|
|
async def startup_event():
|
|
global browser
|
|
playwright = await async_playwright().start()
|
|
browser = await playwright.chromium.launch()
|
|
|
|
|
|
@app.on_event("shutdown")
|
|
async def shutdown_event():
|
|
await browser.close()
|
|
|
|
|
|
@app.post("/html")
|
|
async def root(body: UrlModel):
|
|
context = await browser.new_context()
|
|
page = await context.new_page()
|
|
await page.goto(body.url, timeout=15000) # Set max timeout to 15s
|
|
if body.wait: # Check if wait parameter is provided in the request body
|
|
await page.wait_for_timeout(body.wait) # Convert seconds to milliseconds for playwright
|
|
page_content = await page.content()
|
|
await context.close()
|
|
json_compatible_item_data = {"content": page_content}
|
|
return JSONResponse(content=json_compatible_item_data)
|