From c8df92d0eb9afa5638e2a65b1b8f2d134959b2aa Mon Sep 17 00:00:00 2001 From: ice yao Date: Tue, 10 Sep 2024 09:19:47 +0800 Subject: [PATCH] add volcengine tos storage (#8164) --- api/.env.example | 9 +++- api/configs/middleware/__init__.py | 2 + .../storage/volcengine_tos_storage_config.py | 34 +++++++++++++ api/extensions/ext_storage.py | 3 ++ api/extensions/storage/volcengine_storage.py | 48 +++++++++++++++++++ api/poetry.lock | 19 +++++++- api/pyproject.toml | 1 + docker/.env.example | 12 +++++ docker/docker-compose.yaml | 5 ++ 9 files changed, 131 insertions(+), 2 deletions(-) create mode 100644 api/configs/middleware/storage/volcengine_tos_storage_config.py create mode 100644 api/extensions/storage/volcengine_storage.py diff --git a/api/.env.example b/api/.env.example index 2320a4042d..a3db406aea 100644 --- a/api/.env.example +++ b/api/.env.example @@ -39,7 +39,7 @@ DB_DATABASE=dify # Storage configuration # use for store upload files, private keys... -# storage type: local, s3, azure-blob, google-storage, tencent-cos, huawei-obs +# storage type: local, s3, azure-blob, google-storage, tencent-cos, huawei-obs, volcengine-tos STORAGE_TYPE=local STORAGE_LOCAL_PATH=storage S3_USE_AWS_MANAGED_IAM=false @@ -86,6 +86,13 @@ OCI_ACCESS_KEY=your-access-key OCI_SECRET_KEY=your-secret-key OCI_REGION=your-region +# Volcengine tos Storage configuration +VOLCENGINE_TOS_ENDPOINT=your-endpoint +VOLCENGINE_TOS_BUCKET_NAME=your-bucket-name +VOLCENGINE_TOS_ACCESS_KEY=your-access-key +VOLCENGINE_TOS_SECRET_KEY=your-secret-key +VOLCENGINE_TOS_REGION=your-region + # CORS configuration WEB_API_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,* CONSOLE_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,* diff --git a/api/configs/middleware/__init__.py b/api/configs/middleware/__init__.py index 18c5ba8df3..e017c2c5b8 100644 --- a/api/configs/middleware/__init__.py +++ b/api/configs/middleware/__init__.py @@ -12,6 +12,7 @@ from configs.middleware.storage.google_cloud_storage_config import GoogleCloudSt from configs.middleware.storage.huawei_obs_storage_config import HuaweiCloudOBSStorageConfig from configs.middleware.storage.oci_storage_config import OCIStorageConfig from configs.middleware.storage.tencent_cos_storage_config import TencentCloudCOSStorageConfig +from configs.middleware.storage.volcengine_tos_storage_config import VolcengineTOSStorageConfig from configs.middleware.vdb.analyticdb_config import AnalyticdbConfig from configs.middleware.vdb.chroma_config import ChromaConfig from configs.middleware.vdb.elasticsearch_config import ElasticsearchConfig @@ -201,6 +202,7 @@ class MiddlewareConfig( GoogleCloudStorageConfig, TencentCloudCOSStorageConfig, HuaweiCloudOBSStorageConfig, + VolcengineTOSStorageConfig, S3StorageConfig, OCIStorageConfig, # configs of vdb and vdb providers diff --git a/api/configs/middleware/storage/volcengine_tos_storage_config.py b/api/configs/middleware/storage/volcengine_tos_storage_config.py new file mode 100644 index 0000000000..a0e09a3cc7 --- /dev/null +++ b/api/configs/middleware/storage/volcengine_tos_storage_config.py @@ -0,0 +1,34 @@ +from typing import Optional + +from pydantic import BaseModel, Field + + +class VolcengineTOSStorageConfig(BaseModel): + """ + Volcengine tos storage configs + """ + + VOLCENGINE_TOS_BUCKET_NAME: Optional[str] = Field( + description="Volcengine TOS Bucket Name", + default=None, + ) + + VOLCENGINE_TOS_ACCESS_KEY: Optional[str] = Field( + description="Volcengine TOS Access Key", + default=None, + ) + + VOLCENGINE_TOS_SECRET_KEY: Optional[str] = Field( + description="Volcengine TOS Secret Key", + default=None, + ) + + VOLCENGINE_TOS_ENDPOINT: Optional[str] = Field( + description="Volcengine TOS Endpoint URL", + default=None, + ) + + VOLCENGINE_TOS_REGION: Optional[str] = Field( + description="Volcengine TOS Region", + default=None, + ) diff --git a/api/extensions/ext_storage.py b/api/extensions/ext_storage.py index 6f572e3e81..5ce18b7292 100644 --- a/api/extensions/ext_storage.py +++ b/api/extensions/ext_storage.py @@ -11,6 +11,7 @@ from extensions.storage.local_storage import LocalStorage from extensions.storage.oci_storage import OCIStorage from extensions.storage.s3_storage import S3Storage from extensions.storage.tencent_storage import TencentStorage +from extensions.storage.volcengine_storage import VolcengineStorage class Storage: @@ -33,6 +34,8 @@ class Storage: self.storage_runner = OCIStorage(app=app) elif storage_type == "huawei-obs": self.storage_runner = HuaweiStorage(app=app) + elif storage_type == "volcengine-tos": + self.storage_runner = VolcengineStorage(app=app) else: self.storage_runner = LocalStorage(app=app) diff --git a/api/extensions/storage/volcengine_storage.py b/api/extensions/storage/volcengine_storage.py new file mode 100644 index 0000000000..f74ad2ee6d --- /dev/null +++ b/api/extensions/storage/volcengine_storage.py @@ -0,0 +1,48 @@ +from collections.abc import Generator + +import tos +from flask import Flask + +from extensions.storage.base_storage import BaseStorage + + +class VolcengineStorage(BaseStorage): + """Implementation for Volcengine TOS storage.""" + + def __init__(self, app: Flask): + super().__init__(app) + app_config = self.app.config + self.bucket_name = app_config.get("VOLCENGINE_TOS_BUCKET_NAME") + self.client = tos.TosClientV2( + ak=app_config.get("VOLCENGINE_TOS_ACCESS_KEY"), + sk=app_config.get("VOLCENGINE_TOS_SECRET_KEY"), + endpoint=app_config.get("VOLCENGINE_TOS_ENDPOINT"), + region=app_config.get("VOLCENGINE_TOS_REGION"), + ) + + def save(self, filename, data): + self.client.put_object(bucket=self.bucket_name, key=filename, content=data) + + def load_once(self, filename: str) -> bytes: + data = self.client.get_object(bucket=self.bucket_name, key=filename).read() + return data + + def load_stream(self, filename: str) -> Generator: + def generate(filename: str = filename) -> Generator: + response = self.client.get_object(bucket=self.bucket_name, key=filename) + while chunk := response.read(4096): + yield chunk + + return generate() + + def download(self, filename, target_filepath): + self.client.get_object_to_file(bucket=self.bucket_name, key=filename, file_path=target_filepath) + + def exists(self, filename): + res = self.client.head_object(bucket=self.bucket_name, key=filename) + if res.status_code != 200: + return False + return True + + def delete(self, filename): + self.client.delete_object(bucket=self.bucket_name, key=filename) diff --git a/api/poetry.lock b/api/poetry.lock index 21708795b1..103423e5c7 100644 --- a/api/poetry.lock +++ b/api/poetry.lock @@ -9036,6 +9036,23 @@ files = [ {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, ] +[[package]] +name = "tos" +version = "2.7.1" +description = "Volc TOS (Tinder Object Storage) SDK" +optional = false +python-versions = "*" +files = [ + {file = "tos-2.7.1.tar.gz", hash = "sha256:4bccdbff3cfd63eb44648bb44862903708c4b3e790f0dd55c96305baaeece805"}, +] + +[package.dependencies] +crcmod = ">=1.7" +Deprecated = ">=1.2.13,<2.0.0" +pytz = "*" +requests = ">=2.19.1,<3.dev0" +six = "*" + [[package]] name = "tqdm" version = "4.66.5" @@ -10371,4 +10388,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.13" -content-hash = "c3c637d643f4dcb3e35d0e7f2a3a4fbaf2a730512a4ca31adce5884c94f07f57" +content-hash = "2dbff415c3c9ca95c8dcfb59fc088ce2c0d00037c44f386a34c87c98e1d8b942" diff --git a/api/pyproject.toml b/api/pyproject.toml index 826b7debc4..69d1fc4ee0 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -192,6 +192,7 @@ azure-ai-ml = "^1.19.0" azure-ai-inference = "^1.0.0b3" volcengine-python-sdk = {extras = ["ark"], version = "^1.0.98"} oci = "^2.133.0" +tos = "^2.7.1" [tool.poetry.group.indriect.dependencies] kaleido = "0.2.1" rank-bm25 = "~0.2.2" diff --git a/docker/.env.example b/docker/.env.example index 6543138889..ca24c667f6 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -329,6 +329,18 @@ HUAWEI_OBS_ACCESS_KEY=your-access-key # The server url of the HUAWEI OBS service. HUAWEI_OBS_SERVER=your-server-url +# Volcengine TOS Configuration +# The name of the Volcengine TOS bucket to use for storing files. +VOLCENGINE_TOS_BUCKET_NAME=your-bucket-name +# The secret key to use for authenticating with the Volcengine TOS service. +VOLCENGINE_TOS_SECRET_KEY=your-secret-key +# The access key to use for authenticating with the Volcengine TOS service. +VOLCENGINE_TOS_ACCESS_KEY=your-access-key +# The endpoint of the Volcengine TOS service. +VOLCENGINE_TOS_ENDPOINT=your-server-url +# The region of the Volcengine TOS service. +VOLCENGINE_TOS_REGION=your-region + # ------------------------------ # Vector Database Configuration # ------------------------------ diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 4a35b92c84..aca87a6f8f 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -90,6 +90,11 @@ x-shared-env: &shared-api-worker-env OCI_ACCESS_KEY: ${OCI_ACCESS_KEY:-} OCI_SECRET_KEY: ${OCI_SECRET_KEY:-} OCI_REGION: ${OCI_REGION:-} + VOLCENGINE_TOS_BUCKET_NAME: ${VOLCENGINE_TOS_BUCKET_NAME:-} + VOLCENGINE_TOS_SECRET_KEY: ${VOLCENGINE_TOS_SECRET_KEY:-} + VOLCENGINE_TOS_ACCESS_KEY: ${VOLCENGINE_TOS_ACCESS_KEY:-} + VOLCENGINE_TOS_ENDPOINT: ${VOLCENGINE_TOS_ENDPOINT:-} + VOLCENGINE_TOS_REGION: ${VOLCENGINE_TOS_REGION:-} VECTOR_STORE: ${VECTOR_STORE:-weaviate} WEAVIATE_ENDPOINT: ${WEAVIATE_ENDPOINT:-http://weaviate:8080} WEAVIATE_API_KEY: ${WEAVIATE_API_KEY:-WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih}