Various Linting

Pylint
C0114: Missing module docstring
C0115: Missing class docstring
C0116: Missing function or method docstring
C0303: Trailing whitespace
Import ordering
This commit is contained in:
Matt Joyce 2024-06-01 13:24:40 +10:00
parent 2a39b5382b
commit c516140bfb

View File

@ -1,9 +1,15 @@
from fastapi import FastAPI
from playwright.async_api import async_playwright, Browser
from fastapi.responses import JSONResponse
from pydantic import BaseModel
"""
This module provides a FastAPI application that uses Playwright to fetch and return
the HTML content of a specified URL. It supports optional proxy settings and media blocking.
"""
from os import environ
from fastapi import FastAPI
from fastapi.responses import JSONResponse
from playwright.async_api import Browser, async_playwright
from pydantic import BaseModel
PROXY_SERVER = environ.get('PROXY_SERVER', None)
PROXY_USERNAME = environ.get('PROXY_USERNAME', None)
PROXY_PASSWORD = environ.get('PROXY_PASSWORD', None)
@ -11,30 +17,37 @@ BLOCK_MEDIA = environ.get('BLOCK_MEDIA', 'False').upper() == 'TRUE'
app = FastAPI()
class UrlModel(BaseModel):
"""Model representing the URL and associated parameters for the request."""
url: str
wait: int = None
timeout: int = 15000
browser: Browser = None
@app.on_event("startup")
async def startup_event():
"""Event handler for application startup to initialize the browser."""
global browser
playwright = await async_playwright().start()
browser = await playwright.chromium.launch()
@app.on_event("shutdown")
async def shutdown_event():
"""Event handler for application shutdown to close the browser."""
await browser.close()
@app.post("/html")
async def root(body: UrlModel):
"""
Endpoint to fetch and return HTML content of a given URL.
Args:
body (UrlModel): The URL model containing the target URL, wait time, and timeout.
Returns:
JSONResponse: The HTML content of the page.
"""
context = None
if PROXY_SERVER and PROXY_USERNAME and PROXY_PASSWORD:
context = await browser.new_context(proxy={"server": PROXY_SERVER,
@ -56,7 +69,7 @@ async def root(body: UrlModel):
# Wait != timeout. Wait is the time to wait after the page is loaded - useful in some cases were "load" / "networkidle" is not enough
if body.wait:
await page.wait_for_timeout(body.wait)
page_content = await page.content()
await context.close()
json_compatible_item_data = {"content": page_content}