From 7c70eb87bc4d281288b140ed6c6c2a8cb92b1ff5 Mon Sep 17 00:00:00 2001 From: 8bitpd <51897400+lpdink@users.noreply.github.com> Date: Tue, 9 Jul 2024 13:32:04 +0800 Subject: [PATCH] feat: support AnalyticDB vector store (#5586) Co-authored-by: xiaozeyu --- api/.env.example | 10 + api/commands.py | 8 + api/configs/middleware/__init__.py | 2 + .../middleware/vdb/analyticdb_config.py | 44 +++ api/controllers/console/datasets/datasets.py | 4 +- .../rag/datasource/vdb/analyticdb/__init__.py | 0 .../vdb/analyticdb/analyticdb_vector.py | 332 ++++++++++++++++++ api/core/rag/datasource/vdb/vector_factory.py | 3 + api/core/rag/datasource/vdb/vector_type.py | 1 + api/poetry.lock | 194 +++++++++- api/pyproject.toml | 2 + .../vdb/analyticdb/__init__.py | 0 .../vdb/analyticdb/test_analyticdb.py | 31 ++ docker/docker-compose.yaml | 9 + 14 files changed, 637 insertions(+), 3 deletions(-) create mode 100644 api/configs/middleware/vdb/analyticdb_config.py create mode 100644 api/core/rag/datasource/vdb/analyticdb/__init__.py create mode 100644 api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py create mode 100644 api/tests/integration_tests/vdb/analyticdb/__init__.py create mode 100644 api/tests/integration_tests/vdb/analyticdb/test_analyticdb.py diff --git a/api/.env.example b/api/.env.example index 573c8bf90c..c28d25a454 100644 --- a/api/.env.example +++ b/api/.env.example @@ -151,6 +151,16 @@ CHROMA_DATABASE=default_database CHROMA_AUTH_PROVIDER=chromadb.auth.token_authn.TokenAuthenticationServerProvider CHROMA_AUTH_CREDENTIALS=difyai123456 +# AnalyticDB configuration +ANALYTICDB_KEY_ID=your-ak +ANALYTICDB_KEY_SECRET=your-sk +ANALYTICDB_REGION_ID=cn-hangzhou +ANALYTICDB_INSTANCE_ID=gp-ab123456 +ANALYTICDB_ACCOUNT=testaccount +ANALYTICDB_PASSWORD=testpassword +ANALYTICDB_NAMESPACE=dify +ANALYTICDB_NAMESPACE_PASSWORD=difypassword + # OpenSearch configuration OPENSEARCH_HOST=127.0.0.1 OPENSEARCH_PORT=9200 diff --git a/api/commands.py b/api/commands.py index cc49824b4f..6719539cc8 100644 --- a/api/commands.py +++ b/api/commands.py @@ -337,6 +337,14 @@ def migrate_knowledge_vector_database(): "vector_store": {"class_prefix": collection_name} } dataset.index_struct = json.dumps(index_struct_dict) + elif vector_type == VectorType.ANALYTICDB: + dataset_id = dataset.id + collection_name = Dataset.gen_collection_name_by_id(dataset_id) + index_struct_dict = { + "type": VectorType.ANALYTICDB, + "vector_store": {"class_prefix": collection_name} + } + dataset.index_struct = json.dumps(index_struct_dict) else: raise ValueError(f"Vector store {vector_type} is not supported.") diff --git a/api/configs/middleware/__init__.py b/api/configs/middleware/__init__.py index d8a2fe683a..067bcd7af4 100644 --- a/api/configs/middleware/__init__.py +++ b/api/configs/middleware/__init__.py @@ -10,6 +10,7 @@ from configs.middleware.storage.azure_blob_storage_config import AzureBlobStorag from configs.middleware.storage.google_cloud_storage_config import GoogleCloudStorageConfig from configs.middleware.storage.oci_storage_config import OCIStorageConfig from configs.middleware.storage.tencent_cos_storage_config import TencentCloudCOSStorageConfig +from configs.middleware.vdb.analyticdb_config import AnalyticdbConfig from configs.middleware.vdb.chroma_config import ChromaConfig from configs.middleware.vdb.milvus_config import MilvusConfig from configs.middleware.vdb.opensearch_config import OpenSearchConfig @@ -183,6 +184,7 @@ class MiddlewareConfig( # configs of vdb and vdb providers VectorStoreConfig, + AnalyticdbConfig, ChromaConfig, MilvusConfig, OpenSearchConfig, diff --git a/api/configs/middleware/vdb/analyticdb_config.py b/api/configs/middleware/vdb/analyticdb_config.py new file mode 100644 index 0000000000..db2899265e --- /dev/null +++ b/api/configs/middleware/vdb/analyticdb_config.py @@ -0,0 +1,44 @@ +from typing import Optional + +from pydantic import BaseModel, Field + + +class AnalyticdbConfig(BaseModel): + """ + Configuration for connecting to AnalyticDB. + Refer to the following documentation for details on obtaining credentials: + https://www.alibabacloud.com/help/en/analyticdb-for-postgresql/getting-started/create-an-instance-instances-with-vector-engine-optimization-enabled + """ + + ANALYTICDB_KEY_ID : Optional[str] = Field( + default=None, + description="The Access Key ID provided by Alibaba Cloud for authentication." + ) + ANALYTICDB_KEY_SECRET : Optional[str] = Field( + default=None, + description="The Secret Access Key corresponding to the Access Key ID for secure access." + ) + ANALYTICDB_REGION_ID : Optional[str] = Field( + default=None, + description="The region where the AnalyticDB instance is deployed (e.g., 'cn-hangzhou')." + ) + ANALYTICDB_INSTANCE_ID : Optional[str] = Field( + default=None, + description="The unique identifier of the AnalyticDB instance you want to connect to (e.g., 'gp-ab123456').." + ) + ANALYTICDB_ACCOUNT : Optional[str] = Field( + default=None, + description="The account name used to log in to the AnalyticDB instance." + ) + ANALYTICDB_PASSWORD : Optional[str] = Field( + default=None, + description="The password associated with the AnalyticDB account for authentication." + ) + ANALYTICDB_NAMESPACE : Optional[str] = Field( + default=None, + description="The namespace within AnalyticDB for schema isolation." + ) + ANALYTICDB_NAMESPACE_PASSWORD : Optional[str] = Field( + default=None, + description="The password for accessing the specified namespace within the AnalyticDB instance." + ) diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py index fdd61b0a0c..c1f29d5024 100644 --- a/api/controllers/console/datasets/datasets.py +++ b/api/controllers/console/datasets/datasets.py @@ -515,7 +515,7 @@ class DatasetRetrievalSettingApi(Resource): RetrievalMethod.SEMANTIC_SEARCH ] } - case VectorType.QDRANT | VectorType.WEAVIATE | VectorType.OPENSEARCH: + case VectorType.QDRANT | VectorType.WEAVIATE | VectorType.OPENSEARCH | VectorType.ANALYTICDB: return { 'retrieval_method': [ RetrievalMethod.SEMANTIC_SEARCH, @@ -539,7 +539,7 @@ class DatasetRetrievalSettingMockApi(Resource): RetrievalMethod.SEMANTIC_SEARCH ] } - case VectorType.QDRANT | VectorType.WEAVIATE | VectorType.OPENSEARCH: + case VectorType.QDRANT | VectorType.WEAVIATE | VectorType.OPENSEARCH| VectorType.ANALYTICDB: return { 'retrieval_method': [ RetrievalMethod.SEMANTIC_SEARCH, diff --git a/api/core/rag/datasource/vdb/analyticdb/__init__.py b/api/core/rag/datasource/vdb/analyticdb/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py b/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py new file mode 100644 index 0000000000..d7a5dd5dcc --- /dev/null +++ b/api/core/rag/datasource/vdb/analyticdb/analyticdb_vector.py @@ -0,0 +1,332 @@ +import json +from typing import Any + +from pydantic import BaseModel + +_import_err_msg = ( + "`alibabacloud_gpdb20160503` and `alibabacloud_tea_openapi` packages not found, " + "please run `pip install alibabacloud_gpdb20160503 alibabacloud_tea_openapi`" +) +from flask import current_app + +from core.rag.datasource.entity.embedding import Embeddings +from core.rag.datasource.vdb.vector_base import BaseVector +from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory +from core.rag.datasource.vdb.vector_type import VectorType +from core.rag.models.document import Document +from extensions.ext_redis import redis_client +from models.dataset import Dataset + + +class AnalyticdbConfig(BaseModel): + access_key_id: str + access_key_secret: str + region_id: str + instance_id: str + account: str + account_password: str + namespace: str = ("dify",) + namespace_password: str = (None,) + metrics: str = ("cosine",) + read_timeout: int = 60000 + def to_analyticdb_client_params(self): + return { + "access_key_id": self.access_key_id, + "access_key_secret": self.access_key_secret, + "region_id": self.region_id, + "read_timeout": self.read_timeout, + } + +class AnalyticdbVector(BaseVector): + _instance = None + _init = False + + def __new__(cls, *args, **kwargs): + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def __init__(self, collection_name: str, config: AnalyticdbConfig): + # collection_name must be updated every time + self._collection_name = collection_name.lower() + if AnalyticdbVector._init: + return + try: + from alibabacloud_gpdb20160503.client import Client + from alibabacloud_tea_openapi import models as open_api_models + except: + raise ImportError(_import_err_msg) + self.config = config + self._client_config = open_api_models.Config( + user_agent="dify", **config.to_analyticdb_client_params() + ) + self._client = Client(self._client_config) + self._initialize() + AnalyticdbVector._init = True + + def _initialize(self) -> None: + self._initialize_vector_database() + self._create_namespace_if_not_exists() + + def _initialize_vector_database(self) -> None: + from alibabacloud_gpdb20160503 import models as gpdb_20160503_models + request = gpdb_20160503_models.InitVectorDatabaseRequest( + dbinstance_id=self.config.instance_id, + region_id=self.config.region_id, + manager_account=self.config.account, + manager_account_password=self.config.account_password, + ) + self._client.init_vector_database(request) + + def _create_namespace_if_not_exists(self) -> None: + from alibabacloud_gpdb20160503 import models as gpdb_20160503_models + from Tea.exceptions import TeaException + try: + request = gpdb_20160503_models.DescribeNamespaceRequest( + dbinstance_id=self.config.instance_id, + region_id=self.config.region_id, + namespace=self.config.namespace, + manager_account=self.config.account, + manager_account_password=self.config.account_password, + ) + self._client.describe_namespace(request) + except TeaException as e: + if e.statusCode == 404: + request = gpdb_20160503_models.CreateNamespaceRequest( + dbinstance_id=self.config.instance_id, + region_id=self.config.region_id, + manager_account=self.config.account, + manager_account_password=self.config.account_password, + namespace=self.config.namespace, + namespace_password=self.config.namespace_password, + ) + self._client.create_namespace(request) + else: + raise ValueError( + f"failed to create namespace {self.config.namespace}: {e}" + ) + + def _create_collection_if_not_exists(self, embedding_dimension: int): + from alibabacloud_gpdb20160503 import models as gpdb_20160503_models + from Tea.exceptions import TeaException + cache_key = f"vector_indexing_{self._collection_name}" + lock_name = f"{cache_key}_lock" + with redis_client.lock(lock_name, timeout=20): + collection_exist_cache_key = f"vector_indexing_{self._collection_name}" + if redis_client.get(collection_exist_cache_key): + return + try: + request = gpdb_20160503_models.DescribeCollectionRequest( + dbinstance_id=self.config.instance_id, + region_id=self.config.region_id, + namespace=self.config.namespace, + namespace_password=self.config.namespace_password, + collection=self._collection_name, + ) + self._client.describe_collection(request) + except TeaException as e: + if e.statusCode == 404: + metadata = '{"ref_doc_id":"text","page_content":"text","metadata_":"jsonb"}' + full_text_retrieval_fields = "page_content" + request = gpdb_20160503_models.CreateCollectionRequest( + dbinstance_id=self.config.instance_id, + region_id=self.config.region_id, + manager_account=self.config.account, + manager_account_password=self.config.account_password, + namespace=self.config.namespace, + collection=self._collection_name, + dimension=embedding_dimension, + metrics=self.config.metrics, + metadata=metadata, + full_text_retrieval_fields=full_text_retrieval_fields, + ) + self._client.create_collection(request) + else: + raise ValueError( + f"failed to create collection {self._collection_name}: {e}" + ) + redis_client.set(collection_exist_cache_key, 1, ex=3600) + + def get_type(self) -> str: + return VectorType.ANALYTICDB + + def create(self, texts: list[Document], embeddings: list[list[float]], **kwargs): + dimension = len(embeddings[0]) + self._create_collection_if_not_exists(dimension) + self.add_texts(texts, embeddings) + + def add_texts( + self, documents: list[Document], embeddings: list[list[float]], **kwargs + ): + from alibabacloud_gpdb20160503 import models as gpdb_20160503_models + rows: list[gpdb_20160503_models.UpsertCollectionDataRequestRows] = [] + for doc, embedding in zip(documents, embeddings, strict=True): + metadata = { + "ref_doc_id": doc.metadata["doc_id"], + "page_content": doc.page_content, + "metadata_": json.dumps(doc.metadata), + } + rows.append( + gpdb_20160503_models.UpsertCollectionDataRequestRows( + vector=embedding, + metadata=metadata, + ) + ) + request = gpdb_20160503_models.UpsertCollectionDataRequest( + dbinstance_id=self.config.instance_id, + region_id=self.config.region_id, + namespace=self.config.namespace, + namespace_password=self.config.namespace_password, + collection=self._collection_name, + rows=rows, + ) + self._client.upsert_collection_data(request) + + def text_exists(self, id: str) -> bool: + from alibabacloud_gpdb20160503 import models as gpdb_20160503_models + request = gpdb_20160503_models.QueryCollectionDataRequest( + dbinstance_id=self.config.instance_id, + region_id=self.config.region_id, + namespace=self.config.namespace, + namespace_password=self.config.namespace_password, + collection=self._collection_name, + metrics=self.config.metrics, + include_values=True, + vector=None, + content=None, + top_k=1, + filter=f"ref_doc_id='{id}'" + ) + response = self._client.query_collection_data(request) + return len(response.body.matches.match) > 0 + + def delete_by_ids(self, ids: list[str]) -> None: + from alibabacloud_gpdb20160503 import models as gpdb_20160503_models + ids_str = ",".join(f"'{id}'" for id in ids) + ids_str = f"({ids_str})" + request = gpdb_20160503_models.DeleteCollectionDataRequest( + dbinstance_id=self.config.instance_id, + region_id=self.config.region_id, + namespace=self.config.namespace, + namespace_password=self.config.namespace_password, + collection=self._collection_name, + collection_data=None, + collection_data_filter=f"ref_doc_id IN {ids_str}", + ) + self._client.delete_collection_data(request) + + def delete_by_metadata_field(self, key: str, value: str) -> None: + from alibabacloud_gpdb20160503 import models as gpdb_20160503_models + request = gpdb_20160503_models.DeleteCollectionDataRequest( + dbinstance_id=self.config.instance_id, + region_id=self.config.region_id, + namespace=self.config.namespace, + namespace_password=self.config.namespace_password, + collection=self._collection_name, + collection_data=None, + collection_data_filter=f"metadata_ ->> '{key}' = '{value}'", + ) + self._client.delete_collection_data(request) + + def search_by_vector( + self, query_vector: list[float], **kwargs: Any + ) -> list[Document]: + from alibabacloud_gpdb20160503 import models as gpdb_20160503_models + score_threshold = ( + kwargs.get("score_threshold", 0.0) + if kwargs.get("score_threshold", 0.0) + else 0.0 + ) + request = gpdb_20160503_models.QueryCollectionDataRequest( + dbinstance_id=self.config.instance_id, + region_id=self.config.region_id, + namespace=self.config.namespace, + namespace_password=self.config.namespace_password, + collection=self._collection_name, + include_values=kwargs.pop("include_values", True), + metrics=self.config.metrics, + vector=query_vector, + content=None, + top_k=kwargs.get("top_k", 4), + filter=None, + ) + response = self._client.query_collection_data(request) + documents = [] + for match in response.body.matches.match: + if match.score > score_threshold: + doc = Document( + page_content=match.metadata.get("page_content"), + metadata=json.loads(match.metadata.get("metadata_")), + ) + documents.append(doc) + return documents + + def search_by_full_text(self, query: str, **kwargs: Any) -> list[Document]: + from alibabacloud_gpdb20160503 import models as gpdb_20160503_models + score_threshold = ( + kwargs.get("score_threshold", 0.0) + if kwargs.get("score_threshold", 0.0) + else 0.0 + ) + request = gpdb_20160503_models.QueryCollectionDataRequest( + dbinstance_id=self.config.instance_id, + region_id=self.config.region_id, + namespace=self.config.namespace, + namespace_password=self.config.namespace_password, + collection=self._collection_name, + include_values=kwargs.pop("include_values", True), + metrics=self.config.metrics, + vector=None, + content=query, + top_k=kwargs.get("top_k", 4), + filter=None, + ) + response = self._client.query_collection_data(request) + documents = [] + for match in response.body.matches.match: + if match.score > score_threshold: + doc = Document( + page_content=match.metadata.get("page_content"), + metadata=json.loads(match.metadata.get("metadata_")), + ) + documents.append(doc) + return documents + + def delete(self) -> None: + from alibabacloud_gpdb20160503 import models as gpdb_20160503_models + request = gpdb_20160503_models.DeleteCollectionRequest( + collection=self._collection_name, + dbinstance_id=self.config.instance_id, + namespace=self.config.namespace, + namespace_password=self.config.namespace_password, + region_id=self.config.region_id, + ) + self._client.delete_collection(request) + +class AnalyticdbVectorFactory(AbstractVectorFactory): + def init_vector(self, dataset: Dataset, attributes: list, embeddings: Embeddings): + if dataset.index_struct_dict: + class_prefix: str = dataset.index_struct_dict["vector_store"][ + "class_prefix" + ] + collection_name = class_prefix.lower() + else: + dataset_id = dataset.id + collection_name = Dataset.gen_collection_name_by_id(dataset_id).lower() + dataset.index_struct = json.dumps( + self.gen_index_struct_dict(VectorType.ANALYTICDB, collection_name) + ) + config = current_app.config + return AnalyticdbVector( + collection_name, + AnalyticdbConfig( + access_key_id=config.get("ANALYTICDB_KEY_ID"), + access_key_secret=config.get("ANALYTICDB_KEY_SECRET"), + region_id=config.get("ANALYTICDB_REGION_ID"), + instance_id=config.get("ANALYTICDB_INSTANCE_ID"), + account=config.get("ANALYTICDB_ACCOUNT"), + account_password=config.get("ANALYTICDB_PASSWORD"), + namespace=config.get("ANALYTICDB_NAMESPACE"), + namespace_password=config.get("ANALYTICDB_NAMESPACE_PASSWORD"), + ), + ) \ No newline at end of file diff --git a/api/core/rag/datasource/vdb/vector_factory.py b/api/core/rag/datasource/vdb/vector_factory.py index 719e2b9a23..b7733029f7 100644 --- a/api/core/rag/datasource/vdb/vector_factory.py +++ b/api/core/rag/datasource/vdb/vector_factory.py @@ -84,6 +84,9 @@ class Vector: case VectorType.OPENSEARCH: from core.rag.datasource.vdb.opensearch.opensearch_vector import OpenSearchVectorFactory return OpenSearchVectorFactory + case VectorType.ANALYTICDB: + from core.rag.datasource.vdb.analyticdb.analyticdb_vector import AnalyticdbVectorFactory + return AnalyticdbVectorFactory case _: raise ValueError(f"Vector store {vector_type} is not supported.") diff --git a/api/core/rag/datasource/vdb/vector_type.py b/api/core/rag/datasource/vdb/vector_type.py index dbd5afcb3e..32c8713fda 100644 --- a/api/core/rag/datasource/vdb/vector_type.py +++ b/api/core/rag/datasource/vdb/vector_type.py @@ -2,6 +2,7 @@ from enum import Enum class VectorType(str, Enum): + ANALYTICDB = 'analyticdb' CHROMA = 'chroma' MILVUS = 'milvus' PGVECTOR = 'pgvector' diff --git a/api/poetry.lock b/api/poetry.lock index f11ba9a3a4..e0e67bd78f 100644 --- a/api/poetry.lock +++ b/api/poetry.lock @@ -143,6 +143,198 @@ typing-extensions = ">=4" [package.extras] tz = ["backports.zoneinfo"] +[[package]] +name = "alibabacloud-credentials" +version = "0.3.4" +description = "The alibabacloud credentials module of alibabaCloud Python SDK." +optional = false +python-versions = ">=3.6" +files = [ + {file = "alibabacloud_credentials-0.3.4.tar.gz", hash = "sha256:c15a34fe782c318d4cf24cb041a0385ac4ccd2548e524e5d7fe1cff56a9a6acc"}, +] + +[package.dependencies] +alibabacloud-tea = "*" + +[[package]] +name = "alibabacloud-endpoint-util" +version = "0.0.3" +description = "The endpoint-util module of alibabaCloud Python SDK." +optional = false +python-versions = "*" +files = [ + {file = "alibabacloud_endpoint_util-0.0.3.tar.gz", hash = "sha256:8c0efb76fdcc3af4ca716ef24bbce770201a3f83f98c0afcf81655f684b9c7d2"}, +] + +[package.dependencies] +alibabacloud-tea = ">=0.0.1" + +[[package]] +name = "alibabacloud-gateway-spi" +version = "0.0.1" +description = "Alibaba Cloud Gateway SPI SDK Library for Python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "alibabacloud_gateway_spi-0.0.1.tar.gz", hash = "sha256:1b259855708afc3c04d8711d8530c63f7645e1edc0cf97e2fd15461b08e11c30"}, +] + +[package.dependencies] +alibabacloud_credentials = ">=0.2.0,<1.0.0" + +[[package]] +name = "alibabacloud-gpdb20160503" +version = "3.8.2" +description = "Alibaba Cloud AnalyticDB for PostgreSQL (20160503) SDK Library for Python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "alibabacloud_gpdb20160503-3.8.2-py3-none-any.whl", hash = "sha256:081977cdd4174c786b303f3c5651026297d84baa0256386be8215ee997cd5c75"}, + {file = "alibabacloud_gpdb20160503-3.8.2.tar.gz", hash = "sha256:c964ca721a05e440a1065e33aa74d456eafe2c8b17f6e0d960d5bb44dfe4bd9c"}, +] + +[package.dependencies] +alibabacloud-endpoint-util = ">=0.0.3,<1.0.0" +alibabacloud-openapi-util = ">=0.2.1,<1.0.0" +alibabacloud-openplatform20191219 = ">=2.0.0,<3.0.0" +alibabacloud-oss-sdk = ">=0.1.0,<1.0.0" +alibabacloud-oss-util = ">=0.0.5,<1.0.0" +alibabacloud-tea-fileform = ">=0.0.3,<1.0.0" +alibabacloud-tea-openapi = ">=0.3.10,<1.0.0" +alibabacloud-tea-util = ">=0.3.12,<1.0.0" + +[[package]] +name = "alibabacloud-openapi-util" +version = "0.2.2" +description = "Aliyun Tea OpenApi Library for Python" +optional = false +python-versions = "*" +files = [ + {file = "alibabacloud_openapi_util-0.2.2.tar.gz", hash = "sha256:ebbc3906f554cb4bf8f513e43e8a33e8b6a3d4a0ef13617a0e14c3dda8ef52a8"}, +] + +[package.dependencies] +alibabacloud_tea_util = ">=0.0.2" +cryptography = ">=3.0.0" + +[[package]] +name = "alibabacloud-openplatform20191219" +version = "2.0.0" +description = "Alibaba Cloud OpenPlatform (20191219) SDK Library for Python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "alibabacloud_openplatform20191219-2.0.0-py3-none-any.whl", hash = "sha256:873821c45bca72a6c6ec7a906c9cb21554c122e88893bbac3986934dab30dd36"}, + {file = "alibabacloud_openplatform20191219-2.0.0.tar.gz", hash = "sha256:e67f4c337b7542538746592c6a474bd4ae3a9edccdf62e11a32ca61fad3c9020"}, +] + +[package.dependencies] +alibabacloud-endpoint-util = ">=0.0.3,<1.0.0" +alibabacloud-openapi-util = ">=0.1.6,<1.0.0" +alibabacloud-tea-openapi = ">=0.3.3,<1.0.0" +alibabacloud-tea-util = ">=0.3.6,<1.0.0" + +[[package]] +name = "alibabacloud-oss-sdk" +version = "0.1.0" +description = "Aliyun Tea OSS SDK Library for Python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "alibabacloud_oss_sdk-0.1.0.tar.gz", hash = "sha256:cc5ce36044bae758047fccb56c0cb6204cbc362d18cc3dd4ceac54c8c0897b8b"}, +] + +[package.dependencies] +alibabacloud_credentials = ">=0.1.2,<1.0.0" +alibabacloud_oss_util = ">=0.0.5,<1.0.0" +alibabacloud_tea_fileform = ">=0.0.3,<1.0.0" +alibabacloud_tea_util = ">=0.3.1,<1.0.0" +alibabacloud_tea_xml = ">=0.0.2,<1.0.0" + +[[package]] +name = "alibabacloud-oss-util" +version = "0.0.6" +description = "The oss util module of alibabaCloud Python SDK." +optional = false +python-versions = "*" +files = [ + {file = "alibabacloud_oss_util-0.0.6.tar.gz", hash = "sha256:d3ecec36632434bd509a113e8cf327dc23e830ac8d9dd6949926f4e334c8b5d6"}, +] + +[package.dependencies] +alibabacloud-tea = "*" + +[[package]] +name = "alibabacloud-tea" +version = "0.3.9" +description = "The tea module of alibabaCloud Python SDK." +optional = false +python-versions = ">=3.6" +files = [ + {file = "alibabacloud-tea-0.3.9.tar.gz", hash = "sha256:a9689770003fa9313d1995812f9fe36a2be315e5cdfc8d58de0d96808219ced9"}, + {file = "alibabacloud_tea-0.3.9-py3-none-any.whl", hash = "sha256:402fd2a92e6729f228d8c0300b182f80019edce19d83afa497aeb15fd7947f9a"}, +] + +[package.dependencies] +aiohttp = ">=3.7.0,<4.0.0" +requests = ">=2.21.0,<3.0.0" + +[[package]] +name = "alibabacloud-tea-fileform" +version = "0.0.5" +description = "The tea-fileform module of alibabaCloud Python SDK." +optional = false +python-versions = "*" +files = [ + {file = "alibabacloud_tea_fileform-0.0.5.tar.gz", hash = "sha256:fd00a8c9d85e785a7655059e9651f9e91784678881831f60589172387b968ee8"}, +] + +[package.dependencies] +alibabacloud-tea = ">=0.0.1" + +[[package]] +name = "alibabacloud-tea-openapi" +version = "0.3.10" +description = "Alibaba Cloud openapi SDK Library for Python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "alibabacloud_tea_openapi-0.3.10.tar.gz", hash = "sha256:46e9c54ea857346306cd5c628dc33479349b559179ed2fdb2251dbe6ec9a1cf1"}, +] + +[package.dependencies] +alibabacloud_credentials = ">=0.3.1,<1.0.0" +alibabacloud_gateway_spi = ">=0.0.1,<1.0.0" +alibabacloud_openapi_util = ">=0.2.1,<1.0.0" +alibabacloud_tea_util = ">=0.3.12,<1.0.0" +alibabacloud_tea_xml = ">=0.0.2,<1.0.0" + +[[package]] +name = "alibabacloud-tea-util" +version = "0.3.12" +description = "The tea-util module of alibabaCloud Python SDK." +optional = false +python-versions = ">=3.6" +files = [ + {file = "alibabacloud_tea_util-0.3.12.tar.gz", hash = "sha256:72a2f5a046e5b977ade4202eb4f65b3d70ad707a548e29aacd4a572c2d18d06b"}, +] + +[package.dependencies] +alibabacloud-tea = ">=0.3.3" + +[[package]] +name = "alibabacloud-tea-xml" +version = "0.0.2" +description = "The tea-xml module of alibabaCloud Python SDK." +optional = false +python-versions = "*" +files = [ + {file = "alibabacloud_tea_xml-0.0.2.tar.gz", hash = "sha256:f0135e8148fd7d9c1f029db161863f37f144f837c280cba16c2edeb2f9c549d8"}, +] + +[package.dependencies] +alibabacloud-tea = ">=0.0.1" + [[package]] name = "aliyun-python-sdk-core" version = "2.15.1" @@ -9000,4 +9192,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "fdba75f08df361b7b0d89d375062fa9208a68d2a59597071c6e382285f6fccff" +content-hash = "08572878f911d65a3c4796a7fff2a6d4c9a71dd3fe57387e225436607c179068" diff --git a/api/pyproject.toml b/api/pyproject.toml index a5d226f2ce..60740a3a79 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -209,6 +209,8 @@ tcvectordb = "1.3.2" tidb-vector = "0.0.9" qdrant-client = "1.7.3" weaviate-client = "~3.21.0" +alibabacloud_gpdb20160503 = "~3.8.0" +alibabacloud_tea_openapi = "~0.3.9" ############################################################ # Transparent dependencies required by main dependencies diff --git a/api/tests/integration_tests/vdb/analyticdb/__init__.py b/api/tests/integration_tests/vdb/analyticdb/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/api/tests/integration_tests/vdb/analyticdb/test_analyticdb.py b/api/tests/integration_tests/vdb/analyticdb/test_analyticdb.py new file mode 100644 index 0000000000..d6067af73b --- /dev/null +++ b/api/tests/integration_tests/vdb/analyticdb/test_analyticdb.py @@ -0,0 +1,31 @@ +from core.rag.datasource.vdb.analyticdb.analyticdb_vector import AnalyticdbConfig, AnalyticdbVector +from tests.integration_tests.vdb.test_vector_store import AbstractVectorTest, setup_mock_redis + + +class AnalyticdbVectorTest(AbstractVectorTest): + def __init__(self): + super().__init__() + # Analyticdb requires collection_name length less than 60. + # it's ok for normal usage. + self.collection_name = self.collection_name.replace("_test", "") + self.vector = AnalyticdbVector( + collection_name=self.collection_name, + config=AnalyticdbConfig( + access_key_id="test_key_id", + access_key_secret="test_key_secret", + region_id="test_region", + instance_id="test_id", + account="test_account", + account_password="test_passwd", + namespace="difytest_namespace", + collection="difytest_collection", + namespace_password="test_passwd", + ), + ) + + def run_all_tests(self): + self.vector.delete() + return super().run_all_tests() + +def test_chroma_vector(setup_mock_redis): + AnalyticdbVectorTest().run_all_tests() \ No newline at end of file diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 3d26ae2ad7..90768e2f39 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -108,6 +108,15 @@ x-shared-env: &shared-api-worker-env CHROMA_DATABASE: ${CHROMA_DATABASE:-default_database} CHROMA_AUTH_PROVIDER: ${CHROMA_AUTH_PROVIDER:-chromadb.auth.token_authn.TokenAuthClientProvider} CHROMA_AUTH_CREDENTIALS: ${CHROMA_AUTH_CREDENTIALS:-} + # AnalyticDB configuration + ANALYTICDB_KEY_ID: ${ANALYTICDB_KEY_ID:-} + ANALYTICDB_KEY_SECRET: ${ANALYTICDB_KEY_SECRET:-} + ANALYTICDB_REGION_ID: ${ANALYTICDB_REGION_ID:-} + ANALYTICDB_INSTANCE_ID: ${ANALYTICDB_INSTANCE_ID:-} + ANALYTICDB_ACCOUNT: ${ANALYTICDB_ACCOUNT:-} + ANALYTICDB_PASSWORD: ${ANALYTICDB_PASSWORD:-} + ANALYTICDB_NAMESPACE: ${ANALYTICDB_NAMESPACE:-dify} + ANALYTICDB_NAMESPACE_PASSWORD: ${ANALYTICDB_NAMESPACE_PASSWORD:-} OPENSEARCH_HOST: ${OPENSEARCH_HOST:-opensearch} OPENSEARCH_PORT: ${OPENSEARCH_PORT:-9200} OPENSEARCH_USER: ${OPENSEARCH_USER:-admin}