From 128a66f7fe8383cb30dec5e5e3896d75a225c77e Mon Sep 17 00:00:00 2001 From: cx <88480957+free-cx@users.noreply.github.com> Date: Thu, 26 Sep 2024 16:34:40 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20Ollama=20modelfeature=20set=20vision,=20?= =?UTF-8?q?and=20an=20exception=20occurred=20at=20the=E2=80=A6=20(#8783)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../model_providers/ollama/llm/llm.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/api/core/model_runtime/model_providers/ollama/llm/llm.py b/api/core/model_runtime/model_providers/ollama/llm/llm.py index ff732e6925..a7ea53e0e9 100644 --- a/api/core/model_runtime/model_providers/ollama/llm/llm.py +++ b/api/core/model_runtime/model_providers/ollama/llm/llm.py @@ -364,14 +364,21 @@ class OllamaLargeLanguageModel(LargeLanguageModel): if chunk_json["done"]: # calculate num tokens - if "prompt_eval_count" in chunk_json and "eval_count" in chunk_json: - # transform usage + if "prompt_eval_count" in chunk_json: prompt_tokens = chunk_json["prompt_eval_count"] - completion_tokens = chunk_json["eval_count"] else: - # calculate num tokens - prompt_tokens = self._get_num_tokens_by_gpt2(prompt_messages[0].content) - completion_tokens = self._get_num_tokens_by_gpt2(full_text) + prompt_message_content = prompt_messages[0].content + if isinstance(prompt_message_content, str): + prompt_tokens = self._get_num_tokens_by_gpt2(prompt_message_content) + else: + content_text = "" + for message_content in prompt_message_content: + if message_content.type == PromptMessageContentType.TEXT: + message_content = cast(TextPromptMessageContent, message_content) + content_text += message_content.data + prompt_tokens = self._get_num_tokens_by_gpt2(content_text) + + completion_tokens = chunk_json.get("eval_count", self._get_num_tokens_by_gpt2(full_text)) # transform usage usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)