From 896998ef3fc28628a70d07e7ca483b253f3bd436 Mon Sep 17 00:00:00 2001 From: Bowen Liang Date: Tue, 8 Oct 2024 15:11:45 +0800 Subject: [PATCH] chore: refine python dependency list and check dependencies in order (#9061) --- .github/workflows/api-tests.yml | 5 +- api/poetry.lock | 2 +- api/pyproject.toml | 77 ++++++++++--------- .../artifact_tests/dependencies/__init__.py | 0 .../dependencies/test_dependencies_sorted.py | 61 +++++++++++++++ dev/pytest/pytest_artifacts.sh | 4 + dev/sync-poetry | 3 + 7 files changed, 114 insertions(+), 38 deletions(-) create mode 100644 api/tests/artifact_tests/dependencies/__init__.py create mode 100644 api/tests/artifact_tests/dependencies/test_dependencies_sorted.py create mode 100755 dev/pytest/pytest_artifacts.sh diff --git a/.github/workflows/api-tests.yml b/.github/workflows/api-tests.yml index 7c632f8a34..b1cf41a226 100644 --- a/.github/workflows/api-tests.yml +++ b/.github/workflows/api-tests.yml @@ -39,7 +39,7 @@ jobs: api/pyproject.toml api/poetry.lock - - name: Poetry check + - name: Check Poetry lockfile run: | poetry check -C api --lock poetry show -C api @@ -47,6 +47,9 @@ jobs: - name: Install dependencies run: poetry install -C api --with dev + - name: Check dependencies in pyproject.toml + run: poetry run -C api bash dev/pytest/pytest_artifacts.sh + - name: Run Unit tests run: poetry run -C api bash dev/pytest/pytest_unit_tests.sh diff --git a/api/poetry.lock b/api/poetry.lock index 4b9fdd1526..cf03f8c0f5 100644 --- a/api/poetry.lock +++ b/api/poetry.lock @@ -10595,4 +10595,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.13" -content-hash = "d29d0c4ce384ef94fe65f0a2a145898bd1a117d4fd59c217d15bbb8993f4ce4e" +content-hash = "fd183812f910faf4e840267501c571db5d758ad6eb328d106ba6f79a0322a555" diff --git a/api/pyproject.toml b/api/pyproject.toml index 1c12cc3a36..839df9ea67 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -104,25 +104,23 @@ name = "dify-api" package-mode = false ############################################################ -# Main dependencies +# [ Main ] Dependency group ############################################################ [tool.poetry.dependencies] anthropic = "~0.23.1" authlib = "1.3.1" +azure-ai-inference = "~1.0.0b3" +azure-ai-ml = "~1.20.0" azure-identity = "1.16.1" azure-storage-blob = "12.13.0" beautifulsoup4 = "4.12.2" boto3 = "1.35.17" -sagemaker = "2.231.0" bs4 = "~0.0.1" cachetools = "~5.3.0" celery = "~5.3.6" chardet = "~5.1.0" cohere = "~5.2.4" -cos-python-sdk-v5 = "1.9.30" -esdk-obs-python = "3.24.6.1" -bce-python-sdk = "~0.9.23" dashscope = { version = "~1.17.0", extras = ["tokenizer"] } flask = "~3.0.1" flask-compress = "~1.14" @@ -130,7 +128,7 @@ flask-cors = "~4.0.0" flask-login = "~0.6.3" flask-migrate = "~4.0.5" flask-restful = "~0.3.10" -Flask-SQLAlchemy = "~3.1.1" +flask-sqlalchemy = "~3.1.1" gevent = "~23.9.1" gmpy2 = "~2.2.1" google-ai-generativelanguage = "0.6.9" @@ -139,22 +137,22 @@ google-api-python-client = "2.90.0" google-auth = "2.29.0" google-auth-httplib2 = "0.2.0" google-cloud-aiplatform = "1.49.0" -google-cloud-storage = "2.16.0" google-generativeai = "0.8.1" googleapis-common-protos = "1.63.0" gunicorn = "~22.0.0" httpx = { version = "~0.27.0", extras = ["socks"] } huggingface-hub = "~0.16.4" jieba = "0.42.1" -langfuse = "^2.48.0" -langsmith = "^0.1.77" +langfuse = "~2.51.3" +langsmith = "~0.1.77" mailchimp-transactional = "~1.0.50" markdown = "~3.5.1" -novita-client = "^0.5.7" +nomic = "~3.1.2" +novita-client = "~0.5.7" numpy = "~1.26.4" +oci = "~2.135.1" openai = "~1.29.0" openpyxl = "~3.1.5" -oss2 = "2.18.5" pandas = { version = "~2.2.2", extras = ["performance", "excel"] } psycopg2-binary = "~2.9.6" pycryptodome = "3.19.1" @@ -171,7 +169,8 @@ readabilipy = "0.2.0" redis = { version = "~5.0.3", extras = ["hiredis"] } replicate = "~0.22.0" resend = "~0.7.0" -scikit-learn = "^1.5.1" +sagemaker = "2.231.0" +scikit-learn = "~1.5.1" sentry-sdk = { version = "~1.44.1", extras = ["flask"] } sqlalchemy = "~2.0.29" tencentcloud-sdk-python-hunyuan = "~3.0.1158" @@ -179,6 +178,8 @@ tiktoken = "~0.7.0" tokenizers = "~0.15.0" transformers = "~4.35.0" unstructured = { version = "~0.10.27", extras = ["docx", "epub", "md", "msg", "ppt", "pptx"] } +validators = "0.21.0" +volcengine-python-sdk = {extras = ["ark"], version = "~1.0.98"} websocket-client = "~1.7.0" werkzeug = "~3.0.1" xinference-client = "0.15.2" @@ -187,32 +188,26 @@ zhipuai = "1.0.7" # Before adding new dependency, consider place it in alphabet order (a-z) and suitable group. ############################################################ +# [ Indirect ] dependency group # Related transparent dependencies with pinned version # required by main implementations ############################################################ -azure-ai-ml = "^1.19.0" -azure-ai-inference = "^1.0.0b3" -volcengine-python-sdk = {extras = ["ark"], version = "^1.0.98"} -oci = "^2.133.0" -tos = "^2.7.1" -nomic = "^3.1.2" -validators = "0.21.0" -[tool.poetry.group.indriect.dependencies] +[tool.poetry.group.indirect.dependencies] kaleido = "0.2.1" rank-bm25 = "~0.2.2" safetensors = "~0.4.3" ############################################################ -# Tool dependencies required by tool implementations +# [ Tools ] dependency group ############################################################ - -[tool.poetry.group.tool.dependencies] +[tool.poetry.group.tools.dependencies] arxiv = "2.1.0" cloudscraper = "1.2.71" -matplotlib = "~3.8.2" -newspaper3k = "0.2.8" duckduckgo-search = "~6.3.0" jsonpath-ng = "1.6.1" +matplotlib = "~3.8.2" +newspaper3k = "0.2.8" +nltk = "3.8.1" numexpr = "~2.9.0" opensearch-py = "2.4.0" qrcode = "~7.4.2" @@ -220,11 +215,23 @@ twilio = "~9.0.4" vanna = { version = "0.5.5", extras = ["postgres", "mysql", "clickhouse", "duckdb"] } wikipedia = "1.4.0" yfinance = "~0.2.40" -nltk = "3.8.1" -############################################################ -# VDB dependencies required by vector store clients -############################################################ +############################################################ +# [ Storage ] dependency group +# Required for storage clients +############################################################ +[tool.poetry.group.storage.dependencies] +bce-python-sdk = "~0.9.23" +cos-python-sdk-v5 = "1.9.30" +esdk-obs-python = "3.24.6.1" +google-cloud-storage = "2.16.0" +oss2 = "2.18.5" +tos = "~2.7.1" + +############################################################ +# [ VDB ] dependency group +# Required by vector store clients +############################################################ [tool.poetry.group.vdb.dependencies] alibabacloud_gpdb20160503 = "~3.8.0" alibabacloud_tea_openapi = "~0.3.9" @@ -235,18 +242,17 @@ oracledb = "~2.2.1" pgvecto-rs = { version = "~0.2.1", extras = ['sqlalchemy'] } pgvector = "0.2.5" pymilvus = "~2.4.4" +qdrant-client = "1.7.3" tcvectordb = "1.3.2" tidb-vector = "0.0.9" -qdrant-client = "1.7.3" weaviate-client = "~3.21.0" ############################################################ -# Dev dependencies for running tests +# [ Dev ] dependency group +# Required for development and running tests ############################################################ - [tool.poetry.group.dev] optional = true - [tool.poetry.group.dev.dependencies] coverage = "~7.2.4" pytest = "~8.3.2" @@ -255,12 +261,11 @@ pytest-env = "~1.1.3" pytest-mock = "~3.14.0" ############################################################ -# Lint dependencies for code style linting +# [ Lint ] dependency group +# Required for code style linting ############################################################ - [tool.poetry.group.lint] optional = true - [tool.poetry.group.lint.dependencies] dotenv-linter = "~0.5.0" ruff = "~0.6.9" diff --git a/api/tests/artifact_tests/dependencies/__init__.py b/api/tests/artifact_tests/dependencies/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/artifact_tests/dependencies/test_dependencies_sorted.py b/api/tests/artifact_tests/dependencies/test_dependencies_sorted.py new file mode 100644 index 0000000000..518cee1a3a --- /dev/null +++ b/api/tests/artifact_tests/dependencies/test_dependencies_sorted.py @@ -0,0 +1,61 @@ +from typing import Any + +import toml + +ALL_DEPENDENCY_GROUP_NAMES = [ + # default main group + "", + # required groups + "indirect", + "storage", + "tools", + "vdb", + # optional groups + "dev", + "lint", +] + + +def load_api_poetry_configs() -> dict[str, Any]: + pyproject_toml = toml.load("api/pyproject.toml") + return pyproject_toml.get("tool").get("poetry") + + +def load_dependency_groups() -> dict[str, dict[str, dict[str, Any]]]: + poetry_configs = load_api_poetry_configs() + group_name_to_dependencies = { + group_name: (poetry_configs.get("group").get(group_name) if group_name else poetry_configs).get("dependencies") + for group_name in ALL_DEPENDENCY_GROUP_NAMES + } + return group_name_to_dependencies + + +def test_group_dependencies_sorted(): + for group_name, dependencies in load_dependency_groups().items(): + dependency_names = list(dependencies.keys()) + expected_dependency_names = sorted(set(dependency_names)) + section = f"tool.poetry.group.{group_name}.dependencies" if group_name else "tool.poetry.dependencies" + assert expected_dependency_names == dependency_names, ( + f"Dependencies in group {group_name} are not sorted. " + f"Check and fix [{section}] section in pyproject.toml file" + ) + + +def test_group_dependencies_version_operator(): + for group_name, dependencies in load_dependency_groups().items(): + for dependency_name, specification in dependencies.items(): + version_spec = specification if isinstance(specification, str) else specification.get("version") + assert not version_spec.startswith("^"), ( + f"'^' is not allowed in dependency version," f" but found in '{dependency_name} = {version_spec}'" + ) + + +def test_duplicated_dependency_crossing_groups(): + all_dependency_names: list[str] = [] + for dependencies in load_dependency_groups().values(): + dependency_names = list(dependencies.keys()) + all_dependency_names.extend(dependency_names) + expected_all_dependency_names = set(all_dependency_names) + assert sorted(expected_all_dependency_names) == sorted( + all_dependency_names + ), "Duplicated dependencies crossing groups are found" diff --git a/dev/pytest/pytest_artifacts.sh b/dev/pytest/pytest_artifacts.sh new file mode 100755 index 0000000000..d52acb2273 --- /dev/null +++ b/dev/pytest/pytest_artifacts.sh @@ -0,0 +1,4 @@ +#!/bin/bash +set -x + +pytest api/tests/artifact_tests/ diff --git a/dev/sync-poetry b/dev/sync-poetry index 2dd4dd4fc3..23d5d79e90 100755 --- a/dev/sync-poetry +++ b/dev/sync-poetry @@ -11,5 +11,8 @@ poetry check -C api --lock if [ $? -ne 0 ]; then # update poetry.lock # refreshing lockfile only without updating locked versions + echo "poetry.lock is outdated, refreshing without updating locked versions ..." poetry lock -C api --no-update +else + echo "poetry.lock is ready." fi