From ace7ffab5f37e381ea173f2813cf688dff322be1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9D=9E=E6=B3=95=E6=93=8D=E4=BD=9C?= Date: Fri, 25 Oct 2024 18:48:07 +0800 Subject: [PATCH] feat: support comfyui workflow tool image generate image (#9871) --- .../builtin/comfyui/tools/comfyui_client.py | 37 ++++++++++++------- .../builtin/comfyui/tools/comfyui_workflow.py | 8 ++-- .../comfyui/tools/comfyui_workflow.yaml | 7 ++++ 3 files changed, 35 insertions(+), 17 deletions(-) diff --git a/api/core/tools/provider/builtin/comfyui/tools/comfyui_client.py b/api/core/tools/provider/builtin/comfyui/tools/comfyui_client.py index a41d34d40f..d4bf713441 100644 --- a/api/core/tools/provider/builtin/comfyui/tools/comfyui_client.py +++ b/api/core/tools/provider/builtin/comfyui/tools/comfyui_client.py @@ -1,3 +1,5 @@ +import base64 +import io import json import random import uuid @@ -6,45 +8,48 @@ import httpx from websocket import WebSocket from yarl import URL +from core.file.file_manager import _get_encoded_string +from core.file.models import File + class ComfyUiClient: def __init__(self, base_url: str): self.base_url = URL(base_url) - def get_history(self, prompt_id: str): + def get_history(self, prompt_id: str) -> dict: res = httpx.get(str(self.base_url / "history"), params={"prompt_id": prompt_id}) history = res.json()[prompt_id] return history - def get_image(self, filename: str, subfolder: str, folder_type: str): + def get_image(self, filename: str, subfolder: str, folder_type: str) -> bytes: response = httpx.get( str(self.base_url / "view"), params={"filename": filename, "subfolder": subfolder, "type": folder_type}, ) return response.content - def upload_image(self, input_path: str, name: str, image_type: str = "input", overwrite: bool = False): - # plan to support img2img in dify 0.10.0 - with open(input_path, "rb") as file: - files = {"image": (name, file, "image/png")} - data = {"type": image_type, "overwrite": str(overwrite).lower()} + def upload_image(self, image_file: File) -> dict: + image_content = base64.b64decode(_get_encoded_string(image_file)) + file = io.BytesIO(image_content) + files = {"image": (image_file.filename, file, image_file.mime_type), "overwrite": "true"} + res = httpx.post(str(self.base_url / "upload/image"), files=files) + return res.json() - res = httpx.post(str(self.base_url / "upload/image"), data=data, files=files) - return res - - def queue_prompt(self, client_id: str, prompt: dict): + def queue_prompt(self, client_id: str, prompt: dict) -> str: res = httpx.post(str(self.base_url / "prompt"), json={"client_id": client_id, "prompt": prompt}) prompt_id = res.json()["prompt_id"] return prompt_id - def open_websocket_connection(self): + def open_websocket_connection(self) -> tuple[WebSocket, str]: client_id = str(uuid.uuid4()) ws = WebSocket() ws_address = f"ws://{self.base_url.authority}/ws?clientId={client_id}" ws.connect(ws_address) return ws, client_id - def set_prompt(self, origin_prompt: dict, positive_prompt: str, negative_prompt: str = ""): + def set_prompt( + self, origin_prompt: dict, positive_prompt: str, negative_prompt: str = "", image_name: str = "" + ) -> dict: """ find the first KSampler, then can find the prompt node through it. """ @@ -58,6 +63,10 @@ class ComfyUiClient: if negative_prompt != "": negative_input_id = prompt.get(k_sampler)["inputs"]["negative"][0] prompt.get(negative_input_id)["inputs"]["text"] = negative_prompt + + if image_name != "": + image_loader = [key for key, value in id_to_class_type.items() if value == "LoadImage"][0] + prompt.get(image_loader)["inputs"]["image"] = image_name return prompt def track_progress(self, prompt: dict, ws: WebSocket, prompt_id: str): @@ -89,7 +98,7 @@ class ComfyUiClient: else: continue - def generate_image_by_prompt(self, prompt: dict): + def generate_image_by_prompt(self, prompt: dict) -> list[bytes]: try: ws, client_id = self.open_websocket_connection() prompt_id = self.queue_prompt(client_id, prompt) diff --git a/api/core/tools/provider/builtin/comfyui/tools/comfyui_workflow.py b/api/core/tools/provider/builtin/comfyui/tools/comfyui_workflow.py index e4df9f8c3b..11320d5d0f 100644 --- a/api/core/tools/provider/builtin/comfyui/tools/comfyui_workflow.py +++ b/api/core/tools/provider/builtin/comfyui/tools/comfyui_workflow.py @@ -2,10 +2,9 @@ import json from typing import Any from core.tools.entities.tool_entities import ToolInvokeMessage +from core.tools.provider.builtin.comfyui.tools.comfyui_client import ComfyUiClient from core.tools.tool.builtin_tool import BuiltinTool -from .comfyui_client import ComfyUiClient - class ComfyUIWorkflowTool(BuiltinTool): def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> ToolInvokeMessage | list[ToolInvokeMessage]: @@ -14,13 +13,16 @@ class ComfyUIWorkflowTool(BuiltinTool): positive_prompt = tool_parameters.get("positive_prompt") negative_prompt = tool_parameters.get("negative_prompt") workflow = tool_parameters.get("workflow_json") + image_name = "" + if image := tool_parameters.get("image"): + image_name = comfyui.upload_image(image).get("name") try: origin_prompt = json.loads(workflow) except: return self.create_text_message("the Workflow JSON is not correct") - prompt = comfyui.set_prompt(origin_prompt, positive_prompt, negative_prompt) + prompt = comfyui.set_prompt(origin_prompt, positive_prompt, negative_prompt, image_name) images = comfyui.generate_image_by_prompt(prompt) result = [] for img in images: diff --git a/api/core/tools/provider/builtin/comfyui/tools/comfyui_workflow.yaml b/api/core/tools/provider/builtin/comfyui/tools/comfyui_workflow.yaml index 6342d6d468..55fcdad825 100644 --- a/api/core/tools/provider/builtin/comfyui/tools/comfyui_workflow.yaml +++ b/api/core/tools/provider/builtin/comfyui/tools/comfyui_workflow.yaml @@ -24,6 +24,13 @@ parameters: zh_Hans: 负面提示词 llm_description: Negative prompt, you should describe the image you don't want to generate as a list of words as possible as detailed, the prompt must be written in English. form: llm + - name: image + type: file + label: + en_US: Input Image + zh_Hans: 输入的图片 + llm_description: The input image, used to transfer to the comfyui workflow to generate another image. + form: llm - name: workflow_json type: string required: true