diff --git a/apps/python-sdk/build/lib/firecrawl/firecrawl.py b/apps/python-sdk/build/lib/firecrawl/firecrawl.py index ef3eb532..701810ca 100644 --- a/apps/python-sdk/build/lib/firecrawl/firecrawl.py +++ b/apps/python-sdk/build/lib/firecrawl/firecrawl.py @@ -1,5 +1,7 @@ import os +from typing import Any, Dict, Optional import requests +import time class FirecrawlApp: def __init__(self, api_key=None): @@ -7,26 +9,45 @@ class FirecrawlApp: if self.api_key is None: raise ValueError('No API key provided') - def scrape_url(self, url, params=None): + + + def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any: headers = { 'Content-Type': 'application/json', 'Authorization': f'Bearer {self.api_key}' } - json_data = {'url': url} + # Prepare the base scrape parameters with the URL + scrape_params = {'url': url} + + # If there are additional params, process them if params: - json_data.update(params) + # Initialize extractorOptions if present + extractor_options = params.get('extractorOptions', {}) + # Check and convert the extractionSchema if it's a Pydantic model + if 'extractionSchema' in extractor_options: + if hasattr(extractor_options['extractionSchema'], 'schema'): + extractor_options['extractionSchema'] = extractor_options['extractionSchema'].schema() + # Ensure 'mode' is set, defaulting to 'llm-extraction' if not explicitly provided + extractor_options['mode'] = extractor_options.get('mode', 'llm-extraction') + # Update the scrape_params with the processed extractorOptions + scrape_params['extractorOptions'] = extractor_options + + # Include any other params directly at the top level of scrape_params + for key, value in params.items(): + if key != 'extractorOptions': + scrape_params[key] = value + # Make the POST request with the prepared headers and JSON data response = requests.post( 'https://api.firecrawl.dev/v0/scrape', headers=headers, - json=json_data + json=scrape_params ) if response.status_code == 200: response = response.json() - if response['success'] == True: + if response['success']: return response['data'] else: raise Exception(f'Failed to scrape URL. Error: {response["error"]}') - elif response.status_code in [402, 409, 500]: error_message = response.json().get('error', 'Unknown error occurred') raise Exception(f'Failed to scrape URL. Status code: {response.status_code}. Error: {error_message}') @@ -88,11 +109,23 @@ class FirecrawlApp: 'Authorization': f'Bearer {self.api_key}' } - def _post_request(self, url, data, headers): - return requests.post(url, headers=headers, json=data) + def _post_request(self, url, data, headers, retries=3, backoff_factor=0.5): + for attempt in range(retries): + response = requests.post(url, headers=headers, json=data) + if response.status_code == 502: + time.sleep(backoff_factor * (2 ** attempt)) + else: + return response + return response - def _get_request(self, url, headers): - return requests.get(url, headers=headers) + def _get_request(self, url, headers, retries=3, backoff_factor=0.5): + for attempt in range(retries): + response = requests.get(url, headers=headers) + if response.status_code == 502: + time.sleep(backoff_factor * (2 ** attempt)) + else: + return response + return response def _monitor_job_status(self, job_id, headers, timeout): import time diff --git a/apps/python-sdk/dist/firecrawl-py-0.0.6.tar.gz b/apps/python-sdk/dist/firecrawl-py-0.0.6.tar.gz deleted file mode 100644 index c1b4206e..00000000 Binary files a/apps/python-sdk/dist/firecrawl-py-0.0.6.tar.gz and /dev/null differ diff --git a/apps/python-sdk/dist/firecrawl-py-0.0.8.tar.gz b/apps/python-sdk/dist/firecrawl-py-0.0.8.tar.gz new file mode 100644 index 00000000..b18dde53 Binary files /dev/null and b/apps/python-sdk/dist/firecrawl-py-0.0.8.tar.gz differ diff --git a/apps/python-sdk/dist/firecrawl_py-0.0.6-py3-none-any.whl b/apps/python-sdk/dist/firecrawl_py-0.0.6-py3-none-any.whl deleted file mode 100644 index 5aba5618..00000000 Binary files a/apps/python-sdk/dist/firecrawl_py-0.0.6-py3-none-any.whl and /dev/null differ diff --git a/apps/python-sdk/dist/firecrawl_py-0.0.8-py3-none-any.whl b/apps/python-sdk/dist/firecrawl_py-0.0.8-py3-none-any.whl new file mode 100644 index 00000000..f71cb8e1 Binary files /dev/null and b/apps/python-sdk/dist/firecrawl_py-0.0.8-py3-none-any.whl differ diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index e955ffef..701810ca 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -9,12 +9,7 @@ class FirecrawlApp: if self.api_key is None: raise ValueError('No API key provided') - from pydantic import BaseModel - from typing import Optional, Dict, Any - - class ScrapeParams(BaseModel): - url: str - extractorOptions: Optional[Dict[str, Any]] = None + def scrape_url(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any: headers = { @@ -41,7 +36,6 @@ class FirecrawlApp: for key, value in params.items(): if key != 'extractorOptions': scrape_params[key] = value - print(scrape_params) # Make the POST request with the prepared headers and JSON data response = requests.post( 'https://api.firecrawl.dev/v0/scrape', diff --git a/apps/python-sdk/firecrawl_py.egg-info/PKG-INFO b/apps/python-sdk/firecrawl_py.egg-info/PKG-INFO index 61589c22..e54fda5c 100644 --- a/apps/python-sdk/firecrawl_py.egg-info/PKG-INFO +++ b/apps/python-sdk/firecrawl_py.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: firecrawl-py -Version: 0.0.6 +Version: 0.0.8 Summary: Python SDK for Firecrawl API Home-page: https://github.com/mendableai/firecrawl Author: Mendable.ai diff --git a/apps/python-sdk/setup.py b/apps/python-sdk/setup.py index b870da64..78a4d84c 100644 --- a/apps/python-sdk/setup.py +++ b/apps/python-sdk/setup.py @@ -2,12 +2,12 @@ from setuptools import setup, find_packages setup( name='firecrawl-py', - version='0.0.7', + version='0.0.8', url='https://github.com/mendableai/firecrawl', author='Mendable.ai', author_email='nick@mendable.ai', description='Python SDK for Firecrawl API', - packages=find_packages(), + packages=find_packages(), install_requires=[ 'requests', ],