fix(workflow): enhance prompt handling with vision support (#9790)
Some checks are pending
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/amd64, build-api-amd64) (push) Waiting to run
Build and Push API & Web / build (api, DIFY_API_IMAGE_NAME, linux/arm64, build-api-arm64) (push) Waiting to run
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/amd64, build-web-amd64) (push) Waiting to run
Build and Push API & Web / build (web, DIFY_WEB_IMAGE_NAME, linux/arm64, build-web-arm64) (push) Waiting to run
Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Blocked by required conditions
Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Blocked by required conditions

This commit is contained in:
-LAN- 2024-10-24 17:52:11 +08:00 committed by GitHub
parent e54b7cda3d
commit d018b32d0b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 8 additions and 1 deletions

View File

@ -127,9 +127,10 @@ class LLMNode(BaseNode[LLMNodeData]):
context=context,
memory=memory,
model_config=model_config,
vision_detail=self.node_data.vision.configs.detail,
prompt_template=self.node_data.prompt_template,
memory_config=self.node_data.memory,
vision_enabled=self.node_data.vision.enabled,
vision_detail=self.node_data.vision.configs.detail,
)
process_data = {
@ -518,6 +519,7 @@ class LLMNode(BaseNode[LLMNodeData]):
model_config: ModelConfigWithCredentialsEntity,
prompt_template: Sequence[LLMNodeChatModelMessage] | LLMNodeCompletionModelPromptTemplate,
memory_config: MemoryConfig | None = None,
vision_enabled: bool = False,
vision_detail: ImagePromptMessageContent.DETAIL,
) -> tuple[list[PromptMessage], Optional[list[str]]]:
inputs = inputs or {}
@ -542,6 +544,10 @@ class LLMNode(BaseNode[LLMNodeData]):
if not isinstance(prompt_message.content, str):
prompt_message_content = []
for content_item in prompt_message.content or []:
# Skip image if vision is disabled
if not vision_enabled and content_item.type == PromptMessageContentType.IMAGE:
continue
if isinstance(content_item, ImagePromptMessageContent):
# Override vision config if LLM node has vision config,
# cuz vision detail is related to the configuration from FileUpload feature.

View File

@ -88,6 +88,7 @@ class QuestionClassifierNode(LLMNode):
memory=memory,
model_config=model_config,
files=files,
vision_enabled=node_data.vision.enabled,
vision_detail=node_data.vision.configs.detail,
)