feat: add cogVideo tool (#10456)

2024-11-16 03:32:23 +08:00 · 2024-11-08 17:04:05 +08:00 · 2024-11-08 17:04:05 +08:00 · 4fe5297e35
commit 4fe5297e35
parent 22dee4f6f3
4 changed files with 107 additions and 0 deletions
--- a/api/core/tools/provider/builtin/cogview/tools/cogvideo.py
+++ b/api/core/tools/provider/builtin/cogview/tools/cogvideo.py
@ -0,0 +1,24 @@
+from typing import Any, Union
+
+from zhipuai import ZhipuAI
+
+from core.tools.entities.tool_entities import ToolInvokeMessage
+from core.tools.tool.builtin_tool import BuiltinTool
+
+
+class CogVideoTool(BuiltinTool):
+    def _invoke(
+        self, user_id: str, tool_parameters: dict[str, Any]
+    ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+        client = ZhipuAI(
+            base_url=self.runtime.credentials["zhipuai_base_url"],
+            api_key=self.runtime.credentials["zhipuai_api_key"],
+        )
+        if not tool_parameters.get("prompt") and not tool_parameters.get("image_url"):
+            return self.create_text_message("require at least one of prompt and image_url")
+
+        response = client.videos.generations(
+            model="cogvideox", prompt=tool_parameters.get("prompt"), image_url=tool_parameters.get("image_url")
+        )
+
+        return self.create_json_message(response.dict())
--- a/api/core/tools/provider/builtin/cogview/tools/cogvideo.yaml
+++ b/api/core/tools/provider/builtin/cogview/tools/cogvideo.yaml
@ -0,0 +1,32 @@
+identity:
+  name: cogvideo
+  author: hjlarry
+  label:
+    en_US: CogVideo
+    zh_Hans: CogVideo 视频生成
+description:
+  human:
+    en_US: Use the CogVideox model provided by ZhipuAI to generate videos based on user prompts and images.
+    zh_Hans: 使用智谱cogvideox模型，根据用户输入的提示词和图片，生成视频。
+  llm: A tool for generating videos. The input is user's prompt or image url or both of them, the output is a task id. You can use another tool with this task id to check the status and get the video.
+parameters:
+  - name: prompt
+    type: string
+    label:
+      en_US: prompt
+      zh_Hans: 提示词
+    human_description:
+      en_US: The prompt text used to generate video.
+      zh_Hans: 用于生成视频的提示词。
+    llm_description: The prompt text used to generate video. Optional.
+    form: llm
+  - name: image_url
+    type: string
+    label:
+      en_US: image url
+      zh_Hans: 图片链接
+    human_description:
+      en_US: The image url used to generate video.
+      zh_Hans: 输入一个图片链接，生成的视频将基于该图片和提示词。
+    llm_description: The image url used to generate video. Optional.
+    form: llm
--- a/api/core/tools/provider/builtin/cogview/tools/cogvideo_job.py
+++ b/api/core/tools/provider/builtin/cogview/tools/cogvideo_job.py
@ -0,0 +1,30 @@
+from typing import Any, Union
+
+import httpx
+from zhipuai import ZhipuAI
+
+from core.tools.entities.tool_entities import ToolInvokeMessage
+from core.tools.tool.builtin_tool import BuiltinTool
+
+
+class CogVideoJobTool(BuiltinTool):
+    def _invoke(
+        self, user_id: str, tool_parameters: dict[str, Any]
+    ) -> Union[ToolInvokeMessage, list[ToolInvokeMessage]]:
+        client = ZhipuAI(
+            api_key=self.runtime.credentials["zhipuai_api_key"],
+            base_url=self.runtime.credentials["zhipuai_base_url"],
+        )
+
+        response = client.videos.retrieve_videos_result(id=tool_parameters.get("id"))
+        result = [self.create_json_message(response.dict())]
+        if response.task_status == "SUCCESS":
+            for item in response.video_result:
+                video_cover_image = self.create_image_message(item.cover_image_url)
+                result.append(video_cover_image)
+                video = self.create_blob_message(
+                    blob=httpx.get(item.url).content, meta={"mime_type": "video/mp4"}, save_as=self.VariableKey.VIDEO
+                )
+                result.append(video)
+
+        return result
--- a/api/core/tools/provider/builtin/cogview/tools/cogvideo_job.yaml
+++ b/api/core/tools/provider/builtin/cogview/tools/cogvideo_job.yaml
@ -0,0 +1,21 @@
+identity:
+  name: cogvideo_job
+  author: hjlarry
+  label:
+    en_US: CogVideo Result
+    zh_Hans: CogVideo 结果获取
+description:
+  human:
+    en_US: Get the result of CogVideo tool generation.
+    zh_Hans: 根据 CogVideo 工具返回的 id 获取视频生成结果。
+  llm: Get the result of CogVideo tool generation. The input is the id which is returned by the CogVideo tool. The output is the url of video and video cover image.
+parameters:
+  - name: id
+    type: string
+    label:
+      en_US: id
+    human_description:
+      en_US: The id returned by the CogVideo.
+      zh_Hans: CogVideo 工具返回的 id。
+    llm_description: The id returned by the cogvideo.
+    form: llm