mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 03:32:22 +08:00
Various Linting
Pylint C0114: Missing module docstring C0115: Missing class docstring C0116: Missing function or method docstring C0303: Trailing whitespace Import ordering
This commit is contained in:
parent
2a39b5382b
commit
c516140bfb
|
@ -1,9 +1,15 @@
|
||||||
from fastapi import FastAPI
|
"""
|
||||||
from playwright.async_api import async_playwright, Browser
|
This module provides a FastAPI application that uses Playwright to fetch and return
|
||||||
from fastapi.responses import JSONResponse
|
the HTML content of a specified URL. It supports optional proxy settings and media blocking.
|
||||||
from pydantic import BaseModel
|
"""
|
||||||
|
|
||||||
from os import environ
|
from os import environ
|
||||||
|
|
||||||
|
from fastapi import FastAPI
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
|
from playwright.async_api import Browser, async_playwright
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
PROXY_SERVER = environ.get('PROXY_SERVER', None)
|
PROXY_SERVER = environ.get('PROXY_SERVER', None)
|
||||||
PROXY_USERNAME = environ.get('PROXY_USERNAME', None)
|
PROXY_USERNAME = environ.get('PROXY_USERNAME', None)
|
||||||
PROXY_PASSWORD = environ.get('PROXY_PASSWORD', None)
|
PROXY_PASSWORD = environ.get('PROXY_PASSWORD', None)
|
||||||
|
@ -11,30 +17,37 @@ BLOCK_MEDIA = environ.get('BLOCK_MEDIA', 'False').upper() == 'TRUE'
|
||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
|
|
||||||
|
|
||||||
class UrlModel(BaseModel):
|
class UrlModel(BaseModel):
|
||||||
|
"""Model representing the URL and associated parameters for the request."""
|
||||||
url: str
|
url: str
|
||||||
wait: int = None
|
wait: int = None
|
||||||
timeout: int = 15000
|
timeout: int = 15000
|
||||||
|
|
||||||
|
|
||||||
browser: Browser = None
|
browser: Browser = None
|
||||||
|
|
||||||
|
|
||||||
@app.on_event("startup")
|
@app.on_event("startup")
|
||||||
async def startup_event():
|
async def startup_event():
|
||||||
|
"""Event handler for application startup to initialize the browser."""
|
||||||
global browser
|
global browser
|
||||||
playwright = await async_playwright().start()
|
playwright = await async_playwright().start()
|
||||||
browser = await playwright.chromium.launch()
|
browser = await playwright.chromium.launch()
|
||||||
|
|
||||||
|
|
||||||
@app.on_event("shutdown")
|
@app.on_event("shutdown")
|
||||||
async def shutdown_event():
|
async def shutdown_event():
|
||||||
|
"""Event handler for application shutdown to close the browser."""
|
||||||
await browser.close()
|
await browser.close()
|
||||||
|
|
||||||
|
|
||||||
@app.post("/html")
|
@app.post("/html")
|
||||||
async def root(body: UrlModel):
|
async def root(body: UrlModel):
|
||||||
|
"""
|
||||||
|
Endpoint to fetch and return HTML content of a given URL.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
body (UrlModel): The URL model containing the target URL, wait time, and timeout.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
JSONResponse: The HTML content of the page.
|
||||||
|
"""
|
||||||
context = None
|
context = None
|
||||||
if PROXY_SERVER and PROXY_USERNAME and PROXY_PASSWORD:
|
if PROXY_SERVER and PROXY_USERNAME and PROXY_PASSWORD:
|
||||||
context = await browser.new_context(proxy={"server": PROXY_SERVER,
|
context = await browser.new_context(proxy={"server": PROXY_SERVER,
|
||||||
|
@ -56,7 +69,7 @@ async def root(body: UrlModel):
|
||||||
# Wait != timeout. Wait is the time to wait after the page is loaded - useful in some cases were "load" / "networkidle" is not enough
|
# Wait != timeout. Wait is the time to wait after the page is loaded - useful in some cases were "load" / "networkidle" is not enough
|
||||||
if body.wait:
|
if body.wait:
|
||||||
await page.wait_for_timeout(body.wait)
|
await page.wait_for_timeout(body.wait)
|
||||||
|
|
||||||
page_content = await page.content()
|
page_content = await page.content()
|
||||||
await context.close()
|
await context.close()
|
||||||
json_compatible_item_data = {"content": page_content}
|
json_compatible_item_data = {"content": page_content}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user