mirror of
https://github.com/mendableai/firecrawl.git
synced 2024-11-16 11:42:24 +08:00
Merge pull request #253 from mattjoyce/py-sdk-improve-response-handling
Python sdk improve response handling
This commit is contained in:
commit
5fd228f9ec
|
@ -13,7 +13,7 @@ import os
|
||||||
|
|
||||||
from .firecrawl import FirecrawlApp
|
from .firecrawl import FirecrawlApp
|
||||||
|
|
||||||
__version__ = "0.0.15"
|
__version__ = "0.0.16"
|
||||||
|
|
||||||
# Define the logger for the Firecrawl project
|
# Define the logger for the Firecrawl project
|
||||||
logger: logging.Logger = logging.getLogger("firecrawl")
|
logger: logging.Logger = logging.getLogger("firecrawl")
|
||||||
|
|
|
@ -27,14 +27,14 @@ def test_scrape_url_invalid_api_key():
|
||||||
invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
|
invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
|
||||||
with pytest.raises(Exception) as excinfo:
|
with pytest.raises(Exception) as excinfo:
|
||||||
invalid_app.scrape_url('https://firecrawl.dev')
|
invalid_app.scrape_url('https://firecrawl.dev')
|
||||||
assert "Failed to scrape URL. Status code: 401" in str(excinfo.value)
|
assert "Unexpected error during scrape URL: Status code 401. Unauthorized: Invalid token" in str(excinfo.value)
|
||||||
|
|
||||||
def test_blocklisted_url():
|
def test_blocklisted_url():
|
||||||
blocklisted_url = "https://facebook.com/fake-test"
|
blocklisted_url = "https://facebook.com/fake-test"
|
||||||
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||||
with pytest.raises(Exception) as excinfo:
|
with pytest.raises(Exception) as excinfo:
|
||||||
app.scrape_url(blocklisted_url)
|
app.scrape_url(blocklisted_url)
|
||||||
assert "Failed to scrape URL. Status code: 403" in str(excinfo.value)
|
assert "Unexpected error during scrape URL: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." in str(excinfo.value)
|
||||||
|
|
||||||
def test_successful_response_with_valid_preview_token():
|
def test_successful_response_with_valid_preview_token():
|
||||||
app = FirecrawlApp(api_url=API_URL, api_key="this_is_just_a_preview_token")
|
app = FirecrawlApp(api_url=API_URL, api_key="this_is_just_a_preview_token")
|
||||||
|
@ -86,14 +86,14 @@ def test_crawl_url_invalid_api_key():
|
||||||
invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
|
invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
|
||||||
with pytest.raises(Exception) as excinfo:
|
with pytest.raises(Exception) as excinfo:
|
||||||
invalid_app.crawl_url('https://firecrawl.dev')
|
invalid_app.crawl_url('https://firecrawl.dev')
|
||||||
assert "Unexpected error occurred while trying to start crawl job. Status code: 401" in str(excinfo.value)
|
assert "Unexpected error during start crawl job: Status code 401. Unauthorized: Invalid token" in str(excinfo.value)
|
||||||
|
|
||||||
def test_should_return_error_for_blocklisted_url():
|
def test_should_return_error_for_blocklisted_url():
|
||||||
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||||
blocklisted_url = "https://twitter.com/fake-test"
|
blocklisted_url = "https://twitter.com/fake-test"
|
||||||
with pytest.raises(Exception) as excinfo:
|
with pytest.raises(Exception) as excinfo:
|
||||||
app.crawl_url(blocklisted_url)
|
app.crawl_url(blocklisted_url)
|
||||||
assert "Unexpected error occurred while trying to start crawl job. Status code: 403" in str(excinfo.value)
|
assert "Unexpected error during start crawl job: Status code 403. Firecrawl currently does not support social media scraping due to policy restrictions. We're actively working on building support for it." in str(excinfo.value)
|
||||||
|
|
||||||
def test_crawl_url_wait_for_completion_e2e():
|
def test_crawl_url_wait_for_completion_e2e():
|
||||||
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||||
|
@ -114,7 +114,7 @@ def test_crawl_url_with_idempotency_key_e2e():
|
||||||
|
|
||||||
with pytest.raises(Exception) as excinfo:
|
with pytest.raises(Exception) as excinfo:
|
||||||
app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)
|
app.crawl_url('https://firecrawl.dev', {'crawlerOptions': {'excludes': ['blog/*']}}, True, 2, uniqueIdempotencyKey)
|
||||||
assert "Failed to start crawl job. Status code: 409. Error: Idempotency key already used" in str(excinfo.value)
|
assert "Conflict: Failed to start crawl job due to a conflict. Idempotency key already used" in str(excinfo.value)
|
||||||
|
|
||||||
def test_check_crawl_status_e2e():
|
def test_check_crawl_status_e2e():
|
||||||
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||||
|
@ -141,7 +141,7 @@ def test_search_invalid_api_key():
|
||||||
invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
|
invalid_app = FirecrawlApp(api_url=API_URL, api_key="invalid_api_key")
|
||||||
with pytest.raises(Exception) as excinfo:
|
with pytest.raises(Exception) as excinfo:
|
||||||
invalid_app.search("test query")
|
invalid_app.search("test query")
|
||||||
assert "Failed to search. Status code: 401" in str(excinfo.value)
|
assert "Unexpected error during search: Status code 401. Unauthorized: Invalid token" in str(excinfo.value)
|
||||||
|
|
||||||
def test_llm_extraction():
|
def test_llm_extraction():
|
||||||
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
app = FirecrawlApp(api_url=API_URL, api_key=TEST_API_KEY)
|
||||||
|
|
|
@ -53,10 +53,8 @@ class FirecrawlApp:
|
||||||
Exception: If the scrape request fails.
|
Exception: If the scrape request fails.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
headers = {
|
headers = self._prepare_headers()
|
||||||
'Content-Type': 'application/json',
|
|
||||||
'Authorization': f'Bearer {self.api_key}'
|
|
||||||
}
|
|
||||||
# Prepare the base scrape parameters with the URL
|
# Prepare the base scrape parameters with the URL
|
||||||
scrape_params = {'url': url}
|
scrape_params = {'url': url}
|
||||||
|
|
||||||
|
@ -89,13 +87,10 @@ class FirecrawlApp:
|
||||||
return response['data']
|
return response['data']
|
||||||
else:
|
else:
|
||||||
raise Exception(f'Failed to scrape URL. Error: {response["error"]}')
|
raise Exception(f'Failed to scrape URL. Error: {response["error"]}')
|
||||||
elif response.status_code in [402, 408, 409, 500]:
|
|
||||||
error_message = response.json().get('error', 'Unknown error occurred')
|
|
||||||
raise Exception(f'Failed to scrape URL. Status code: {response.status_code}. Error: {error_message}')
|
|
||||||
else:
|
else:
|
||||||
raise Exception(f'Failed to scrape URL. Status code: {response.status_code}')
|
self._handle_error(response, 'scrape URL')
|
||||||
|
|
||||||
def search(self, query, params=None):
|
def search(self, query: str, params: Optional[Dict[str, Any]] = None) -> Any:
|
||||||
"""
|
"""
|
||||||
Perform a search using the Firecrawl API.
|
Perform a search using the Firecrawl API.
|
||||||
|
|
||||||
|
@ -109,10 +104,7 @@ class FirecrawlApp:
|
||||||
Raises:
|
Raises:
|
||||||
Exception: If the search request fails.
|
Exception: If the search request fails.
|
||||||
"""
|
"""
|
||||||
headers = {
|
headers = self._prepare_headers()
|
||||||
'Content-Type': 'application/json',
|
|
||||||
'Authorization': f'Bearer {self.api_key}'
|
|
||||||
}
|
|
||||||
json_data = {'query': query}
|
json_data = {'query': query}
|
||||||
if params:
|
if params:
|
||||||
json_data.update(params)
|
json_data.update(params)
|
||||||
|
@ -129,13 +121,14 @@ class FirecrawlApp:
|
||||||
else:
|
else:
|
||||||
raise Exception(f'Failed to search. Error: {response["error"]}')
|
raise Exception(f'Failed to search. Error: {response["error"]}')
|
||||||
|
|
||||||
elif response.status_code in [402, 409, 500]:
|
|
||||||
error_message = response.json().get('error', 'Unknown error occurred')
|
|
||||||
raise Exception(f'Failed to search. Status code: {response.status_code}. Error: {error_message}')
|
|
||||||
else:
|
else:
|
||||||
raise Exception(f'Failed to search. Status code: {response.status_code}')
|
self._handle_error(response, 'search')
|
||||||
|
|
||||||
def crawl_url(self, url, params=None, wait_until_done=True, poll_interval=2, idempotency_key=None):
|
def crawl_url(self, url: str,
|
||||||
|
params: Optional[Dict[str, Any]] = None,
|
||||||
|
wait_until_done: bool = True,
|
||||||
|
poll_interval: int = 2,
|
||||||
|
idempotency_key: Optional[str] = None) -> Any:
|
||||||
"""
|
"""
|
||||||
Initiate a crawl job for the specified URL using the Firecrawl API.
|
Initiate a crawl job for the specified URL using the Firecrawl API.
|
||||||
|
|
||||||
|
@ -166,7 +159,7 @@ class FirecrawlApp:
|
||||||
else:
|
else:
|
||||||
self._handle_error(response, 'start crawl job')
|
self._handle_error(response, 'start crawl job')
|
||||||
|
|
||||||
def check_crawl_status(self, job_id):
|
def check_crawl_status(self, job_id: str) -> Any:
|
||||||
"""
|
"""
|
||||||
Check the status of a crawl job using the Firecrawl API.
|
Check the status of a crawl job using the Firecrawl API.
|
||||||
|
|
||||||
|
@ -186,7 +179,7 @@ class FirecrawlApp:
|
||||||
else:
|
else:
|
||||||
self._handle_error(response, 'check crawl status')
|
self._handle_error(response, 'check crawl status')
|
||||||
|
|
||||||
def _prepare_headers(self, idempotency_key=None):
|
def _prepare_headers(self, idempotency_key: Optional[str] = None) -> Dict[str, str]:
|
||||||
"""
|
"""
|
||||||
Prepare the headers for API requests.
|
Prepare the headers for API requests.
|
||||||
|
|
||||||
|
@ -208,7 +201,11 @@ class FirecrawlApp:
|
||||||
'Authorization': f'Bearer {self.api_key}',
|
'Authorization': f'Bearer {self.api_key}',
|
||||||
}
|
}
|
||||||
|
|
||||||
def _post_request(self, url, data, headers, retries=3, backoff_factor=0.5):
|
def _post_request(self, url: str,
|
||||||
|
data: Dict[str, Any],
|
||||||
|
headers: Dict[str, str],
|
||||||
|
retries: int = 3,
|
||||||
|
backoff_factor: float = 0.5) -> requests.Response:
|
||||||
"""
|
"""
|
||||||
Make a POST request with retries.
|
Make a POST request with retries.
|
||||||
|
|
||||||
|
@ -233,7 +230,10 @@ class FirecrawlApp:
|
||||||
return response
|
return response
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def _get_request(self, url, headers, retries=3, backoff_factor=0.5):
|
def _get_request(self, url: str,
|
||||||
|
headers: Dict[str, str],
|
||||||
|
retries: int = 3,
|
||||||
|
backoff_factor: float = 0.5) -> requests.Response:
|
||||||
"""
|
"""
|
||||||
Make a GET request with retries.
|
Make a GET request with retries.
|
||||||
|
|
||||||
|
@ -257,7 +257,7 @@ class FirecrawlApp:
|
||||||
return response
|
return response
|
||||||
return response
|
return response
|
||||||
|
|
||||||
def _monitor_job_status(self, job_id, headers, poll_interval):
|
def _monitor_job_status(self, job_id: str, headers: Dict[str, str], poll_interval: int) -> Any:
|
||||||
"""
|
"""
|
||||||
Monitor the status of a crawl job until completion.
|
Monitor the status of a crawl job until completion.
|
||||||
|
|
||||||
|
@ -289,7 +289,7 @@ class FirecrawlApp:
|
||||||
else:
|
else:
|
||||||
self._handle_error(status_response, 'check crawl status')
|
self._handle_error(status_response, 'check crawl status')
|
||||||
|
|
||||||
def _handle_error(self, response, action):
|
def _handle_error(self, response: requests.Response, action: str) -> None:
|
||||||
"""
|
"""
|
||||||
Handle errors from API responses.
|
Handle errors from API responses.
|
||||||
|
|
||||||
|
@ -300,8 +300,19 @@ class FirecrawlApp:
|
||||||
Raises:
|
Raises:
|
||||||
Exception: An exception with a message containing the status code and error details from the response.
|
Exception: An exception with a message containing the status code and error details from the response.
|
||||||
"""
|
"""
|
||||||
if response.status_code in [402, 408, 409, 500]:
|
error_message = response.json().get('error', 'No additional error details provided.')
|
||||||
error_message = response.json().get('error', 'Unknown error occurred')
|
|
||||||
raise Exception(f'Failed to {action}. Status code: {response.status_code}. Error: {error_message}')
|
if response.status_code == 402:
|
||||||
|
message = f"Payment Required: Failed to {action}. {error_message}"
|
||||||
|
elif response.status_code == 408:
|
||||||
|
message = f"Request Timeout: Failed to {action} as the request timed out. {error_message}"
|
||||||
|
elif response.status_code == 409:
|
||||||
|
message = f"Conflict: Failed to {action} due to a conflict. {error_message}"
|
||||||
|
elif response.status_code == 500:
|
||||||
|
message = f"Internal Server Error: Failed to {action}. {error_message}"
|
||||||
else:
|
else:
|
||||||
raise Exception(f'Unexpected error occurred while trying to {action}. Status code: {response.status_code}')
|
message = f"Unexpected error during {action}: Status code {response.status_code}. {error_message}"
|
||||||
|
|
||||||
|
# Raise an HTTPError with the custom message and attach the response
|
||||||
|
raise requests.exceptions.HTTPError(message, response=response)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user