feat: enhance image handling in prompt processing

Updated image processing logic to check for model support of vision features, preventing errors when handling images with models that do not support them. Added a test scenario to validate behavior when vision features are absent. This ensures robust image handling and avoids unexpected behavior during image-related prompts.
2024-11-15 19:22:36 +08:00 · 2024-11-14 20:33:44 +08:00 · 2024-11-14 20:33:44 +08:00 · 87f78ff582
commit 87f78ff582
parent 6872b32c7d
2 changed files with 33 additions and 3 deletions
--- a/api/core/workflow/nodes/llm/node.py
+++ b/api/core/workflow/nodes/llm/node.py
@ -24,7 +24,7 @@ from core.model_runtime.entities.message_entities import (
    SystemPromptMessage,
    UserPromptMessage,
 )
-from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelType
+from core.model_runtime.entities.model_entities import ModelFeature, ModelPropertyKey, ModelType
 from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
 from core.model_runtime.utils.encoders import jsonable_encoder
 from core.prompt.entities.advanced_prompt_entities import CompletionModelPromptTemplate, MemoryConfig
@ -607,8 +607,12 @@ class LLMNode(BaseNode[LLMNodeData]):
            if isinstance(prompt_message.content, list):
                prompt_message_content = []
                for content_item in prompt_message.content:
-                    # Skip image if vision is disabled
-                    if not vision_enabled and content_item.type == PromptMessageContentType.IMAGE:
+                    # Skip image if vision is disabled or model doesn't support vision
+                    if content_item.type == PromptMessageContentType.IMAGE and (
+                        not vision_enabled
+                        or not model_config.model_schema.features
+                        or ModelFeature.VISION not in model_config.model_schema.features
+                    ):
                        continue
                    prompt_message_content.append(content_item)
                if len(prompt_message_content) == 1 and prompt_message_content[0].type == PromptMessageContentType.TEXT:
--- a/api/tests/unit_tests/core/workflow/nodes/llm/test_node.py
+++ b/api/tests/unit_tests/core/workflow/nodes/llm/test_node.py
@ -397,6 +397,32 @@ def test_fetch_prompt_messages__basic(faker, llm_node, model_config):
                )
            },
        ),
+        LLMNodeTestScenario(
+            description="Prompt template with variable selector of File without vision feature",
+            user_query=fake_query,
+            user_files=[],
+            vision_enabled=True,
+            vision_detail=fake_vision_detail,
+            features=[],
+            window_size=fake_window_size,
+            prompt_template=[
+                LLMNodeChatModelMessage(
+                    text="{{#input.image#}}",
+                    role=PromptMessageRole.USER,
+                    edition_type="basic",
+                ),
+            ],
+            expected_messages=mock_history[fake_window_size * -2 :] + [UserPromptMessage(content=fake_query)],
+            file_variables={
+                "input.image": File(
+                    tenant_id="test",
+                    type=FileType.IMAGE,
+                    filename="test1.jpg",
+                    transfer_method=FileTransferMethod.REMOTE_URL,
+                    remote_url=fake_remote_url,
+                )
+            },
+        ),
    ]

    for scenario in test_scenarios: