From 397769c7e3579ca2709f127642def7a040249c58 Mon Sep 17 00:00:00 2001 From: rafaelsideguide <150964962+rafaelsideguide@users.noreply.github.com> Date: Fri, 24 May 2024 17:56:27 -0300 Subject: [PATCH] added python sdk e2e tests with pytest some of them are still missing though --- apps/python-sdk/README.md | 19 ++++ .../__tests__/e2e_withAuth/__init__.py | 0 .../firecrawl/__tests__/e2e_withAuth/test.py | 96 +++++++++++++++++++ apps/python-sdk/setup.py | 1 + 4 files changed, 116 insertions(+) create mode 100644 apps/python-sdk/firecrawl/__tests__/e2e_withAuth/__init__.py create mode 100644 apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py diff --git a/apps/python-sdk/README.md b/apps/python-sdk/README.md index 38ca843b..ae099738 100644 --- a/apps/python-sdk/README.md +++ b/apps/python-sdk/README.md @@ -117,6 +117,25 @@ status = app.check_crawl_status(job_id) The SDK handles errors returned by the Firecrawl API and raises appropriate exceptions. If an error occurs during a request, an exception will be raised with a descriptive error message. +## Running the Tests with Pytest + +To ensure the functionality of the Firecrawl Python SDK, we have included end-to-end tests using `pytest`. These tests cover various aspects of the SDK, including URL scraping, web searching, and website crawling. + +### Running the Tests + +To run the tests, execute the following commands: + +Install pytest: +```bash +pip install pytest +``` + +Run: +```bash +pytest firecrawl/__tests__/e2e_withAuth/test.py +``` + + ## Contributing Contributions to the Firecrawl Python SDK are welcome! If you find any issues or have suggestions for improvements, please open an issue or submit a pull request on the GitHub repository. diff --git a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/__init__.py b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py new file mode 100644 index 00000000..11b66e98 --- /dev/null +++ b/apps/python-sdk/firecrawl/__tests__/e2e_withAuth/test.py @@ -0,0 +1,96 @@ +import pytest +from firecrawl import FirecrawlApp + +TEST_API_KEY = "fc-YOUR_API_KEY" +TEST_URL = "https://firecrawl.dev" + +def test_scrape_url_e2e(): + app = FirecrawlApp(api_key=TEST_API_KEY) + response = app.scrape_url(TEST_URL) + print(response) + assert response is not None + assert 'content' in response + assert "🔥 Firecrawl" in response['content'] + +def test_scrape_url_invalid_api_key(): + invalid_app = FirecrawlApp(api_key="invalid_api_key") + with pytest.raises(Exception) as excinfo: + invalid_app.scrape_url(TEST_URL) + assert "Failed to scrape URL. Status code: 401" in str(excinfo.value) + +def test_crawl_url_e2e(): + app = FirecrawlApp(api_key=TEST_API_KEY) + response = app.crawl_url(TEST_URL, {'crawlerOptions': {'excludes': ['blog/*']}}, True) + assert response is not None + assert len(response) > 0 + assert 'content' in response[0] + assert "🔥 Firecrawl" in response[0]['content'] + +def test_crawl_url_invalid_api_key(): + invalid_app = FirecrawlApp(api_key="invalid_api_key") + with pytest.raises(Exception) as excinfo: + invalid_app.crawl_url(TEST_URL) + assert "Unexpected error occurred while trying to start crawl job. Status code: 401" in str(excinfo.value) + +def test_search_e2e(): + app = FirecrawlApp(api_key=TEST_API_KEY) + response = app.search("test query") + assert response is not None + assert 'content' in response[0] + assert len(response) > 2 + +def test_search_invalid_api_key(): + invalid_app = FirecrawlApp(api_key="invalid_api_key") + with pytest.raises(Exception) as excinfo: + invalid_app.search("test query") + assert "Failed to search. Status code: 401" in str(excinfo.value) + +def test_crawl_with_fast_mode(): + app = FirecrawlApp(api_key=TEST_API_KEY) + response = app.crawl_url(TEST_URL, {'crawlerOptions': {'mode': 'fast'}}, True) + assert response is not None + assert len(response) > 0 + assert 'content' in response[0] + +def test_crawl_with_html_inclusion(): + app = FirecrawlApp(api_key=TEST_API_KEY) + response = app.crawl_url(TEST_URL, {'pageOptions': {'includeHtml': True}}, False) + assert response is not None + assert 'jobId' in response + +def test_crawl_with_pdf_extraction(): + app = FirecrawlApp(api_key=TEST_API_KEY) + response = app.crawl_url("https://arxiv.org/pdf/astro-ph/9301001", + {'crawlerOptions': {'limit': 10, 'excludes': ['list/*', 'login', 'abs/*', 'static/*', 'about/*', 'archive/*']}}, False) + assert response is not None + assert 'jobId' in response + +def test_timeout_during_scraping(): + app = FirecrawlApp(api_key=TEST_API_KEY) + with pytest.raises(Exception) as excinfo: + app.scrape_url(TEST_URL, {'timeout': 1000}) + assert 'Failed to scrape URL. Status code: 408' in str(excinfo.value) + +def test_llm_extraction(): + app = FirecrawlApp(api_key=TEST_API_KEY) + response = app.scrape_url("https://mendable.ai", { + 'extractorOptions': { + 'mode': 'llm-extraction', + 'extractionPrompt': "Based on the information on the page, find what the company's mission is and whether it supports SSO, and whether it is open source", + 'extractionSchema': { + 'type': 'object', + 'properties': { + 'company_mission': {'type': 'string'}, + 'supports_sso': {'type': 'boolean'}, + 'is_open_source': {'type': 'boolean'} + }, + 'required': ['company_mission', 'supports_sso', 'is_open_source'] + } + } + }) + assert response is not None + assert 'llm_extraction' in response + llm_extraction = response['llm_extraction'] + assert 'company_mission' in llm_extraction + assert isinstance(llm_extraction['supports_sso'], bool) + assert isinstance(llm_extraction['is_open_source'], bool) \ No newline at end of file diff --git a/apps/python-sdk/setup.py b/apps/python-sdk/setup.py index 7df520eb..6674a892 100644 --- a/apps/python-sdk/setup.py +++ b/apps/python-sdk/setup.py @@ -10,5 +10,6 @@ setup( packages=find_packages(), install_requires=[ 'requests', + 'pytest', ], )