From adc948e87c31c06f53522d5e398d20c98d734735 Mon Sep 17 00:00:00 2001 From: yanghx <30469680+yanghx-git@users.noreply.github.com> Date: Thu, 13 Jun 2024 05:08:30 +0000 Subject: [PATCH] fix(api/core/model_runtime/model_providers/baichuan,localai): Parse ToolPromptMessage. #4943 (#5138) Co-authored-by: -LAN- --- .../model_providers/baichuan/llm/llm.py | 46 ++++++--- .../model_providers/localai/llm/llm.py | 96 +++++++++++-------- 2 files changed, 88 insertions(+), 54 deletions(-) diff --git a/api/core/model_runtime/model_providers/baichuan/llm/llm.py b/api/core/model_runtime/model_providers/baichuan/llm/llm.py index 4278120093..edcd3af420 100644 --- a/api/core/model_runtime/model_providers/baichuan/llm/llm.py +++ b/api/core/model_runtime/model_providers/baichuan/llm/llm.py @@ -7,6 +7,7 @@ from core.model_runtime.entities.message_entities import ( PromptMessage, PromptMessageTool, SystemPromptMessage, + ToolPromptMessage, UserPromptMessage, ) from core.model_runtime.errors.invoke import ( @@ -32,20 +33,21 @@ from core.model_runtime.model_providers.baichuan.llm.baichuan_turbo_errors impor class BaichuanLarguageModel(LargeLanguageModel): - def _invoke(self, model: str, credentials: dict, - prompt_messages: list[PromptMessage], model_parameters: dict, - tools: list[PromptMessageTool] | None = None, stop: list[str] | None = None, + def _invoke(self, model: str, credentials: dict, + prompt_messages: list[PromptMessage], model_parameters: dict, + tools: list[PromptMessageTool] | None = None, stop: list[str] | None = None, stream: bool = True, user: str | None = None) \ -> LLMResult | Generator: return self._generate(model=model, credentials=credentials, prompt_messages=prompt_messages, - model_parameters=model_parameters, tools=tools, stop=stop, stream=stream, user=user) + model_parameters=model_parameters, tools=tools, stop=stop, stream=stream, user=user) def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], tools: list[PromptMessageTool] | None = None) -> int: return self._num_tokens_from_messages(prompt_messages) - def _num_tokens_from_messages(self, messages: list[PromptMessage],) -> int: + def _num_tokens_from_messages(self, messages: list[PromptMessage], ) -> int: """Calculate num tokens for baichuan model""" + def tokens(text: str): return BaichuanTokenizer._get_num_tokens(text) @@ -85,9 +87,20 @@ class BaichuanLarguageModel(LargeLanguageModel): elif isinstance(message, SystemPromptMessage): message = cast(SystemPromptMessage, message) message_dict = {"role": "user", "content": message.content} + elif isinstance(message, ToolPromptMessage): + # copy from core/model_runtime/model_providers/anthropic/llm/llm.py + message = cast(ToolPromptMessage, message) + message_dict = { + "role": "user", + "content": [{ + "type": "tool_result", + "tool_use_id": message.tool_call_id, + "content": message.content + }] + } else: raise ValueError(f"Unknown message type {type(message)}") - + return message_dict def validate_credentials(self, model: str, credentials: dict) -> None: @@ -106,13 +119,13 @@ class BaichuanLarguageModel(LargeLanguageModel): except Exception as e: raise CredentialsValidateFailedError(f"Invalid API key: {e}") - def _generate(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], - model_parameters: dict, tools: list[PromptMessageTool] | None = None, - stop: list[str] | None = None, stream: bool = True, user: str | None = None) \ + def _generate(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], + model_parameters: dict, tools: list[PromptMessageTool] | None = None, + stop: list[str] | None = None, stream: bool = True, user: str | None = None) \ -> LLMResult | Generator: if tools is not None and len(tools) > 0: raise InvokeBadRequestError("Baichuan model doesn't support tools") - + instance = BaichuanModel( api_key=credentials['api_key'], secret_key=credentials.get('secret_key', '') @@ -129,11 +142,12 @@ class BaichuanLarguageModel(LargeLanguageModel): ] # invoke model - response = instance.generate(model=model, stream=stream, messages=messages, parameters=model_parameters, timeout=60) + response = instance.generate(model=model, stream=stream, messages=messages, parameters=model_parameters, + timeout=60) if stream: return self._handle_chat_generate_stream_response(model, prompt_messages, credentials, response) - + return self._handle_chat_generate_response(model, prompt_messages, credentials, response) def _handle_chat_generate_response(self, model: str, @@ -141,7 +155,9 @@ class BaichuanLarguageModel(LargeLanguageModel): credentials: dict, response: BaichuanMessage) -> LLMResult: # convert baichuan message to llm result - usage = self._calc_response_usage(model=model, credentials=credentials, prompt_tokens=response.usage['prompt_tokens'], completion_tokens=response.usage['completion_tokens']) + usage = self._calc_response_usage(model=model, credentials=credentials, + prompt_tokens=response.usage['prompt_tokens'], + completion_tokens=response.usage['completion_tokens']) return LLMResult( model=model, prompt_messages=prompt_messages, @@ -158,7 +174,9 @@ class BaichuanLarguageModel(LargeLanguageModel): response: Generator[BaichuanMessage, None, None]) -> Generator: for message in response: if message.usage: - usage = self._calc_response_usage(model=model, credentials=credentials, prompt_tokens=message.usage['prompt_tokens'], completion_tokens=message.usage['completion_tokens']) + usage = self._calc_response_usage(model=model, credentials=credentials, + prompt_tokens=message.usage['prompt_tokens'], + completion_tokens=message.usage['completion_tokens']) yield LLMResultChunk( model=model, prompt_messages=prompt_messages, diff --git a/api/core/model_runtime/model_providers/localai/llm/llm.py b/api/core/model_runtime/model_providers/localai/llm/llm.py index 2a3fbb5c57..92c14449e4 100644 --- a/api/core/model_runtime/model_providers/localai/llm/llm.py +++ b/api/core/model_runtime/model_providers/localai/llm/llm.py @@ -27,6 +27,7 @@ from core.model_runtime.entities.message_entities import ( PromptMessage, PromptMessageTool, SystemPromptMessage, + ToolPromptMessage, UserPromptMessage, ) from core.model_runtime.entities.model_entities import ( @@ -51,13 +52,13 @@ from core.model_runtime.utils import helper class LocalAILanguageModel(LargeLanguageModel): - def _invoke(self, model: str, credentials: dict, - prompt_messages: list[PromptMessage], model_parameters: dict, - tools: list[PromptMessageTool] | None = None, stop: list[str] | None = None, + def _invoke(self, model: str, credentials: dict, + prompt_messages: list[PromptMessage], model_parameters: dict, + tools: list[PromptMessageTool] | None = None, stop: list[str] | None = None, stream: bool = True, user: str | None = None) \ -> LLMResult | Generator: return self._generate(model=model, credentials=credentials, prompt_messages=prompt_messages, - model_parameters=model_parameters, tools=tools, stop=stop, stream=stream, user=user) + model_parameters=model_parameters, tools=tools, stop=stop, stream=stream, user=user) def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], tools: list[PromptMessageTool] | None = None) -> int: @@ -67,8 +68,9 @@ class LocalAILanguageModel(LargeLanguageModel): def _num_tokens_from_messages(self, messages: list[PromptMessage], tools: list[PromptMessageTool]) -> int: """ Calculate num tokens for baichuan model - LocalAI does not supports + LocalAI does not supports """ + def tokens(text: str): """ We cloud not determine which tokenizer to use, cause the model is customized. @@ -124,7 +126,7 @@ class LocalAILanguageModel(LargeLanguageModel): num_tokens += self._num_tokens_for_tools(tools) return num_tokens - + def _num_tokens_for_tools(self, tools: list[PromptMessageTool]) -> int: """ Calculate num tokens for tool calling @@ -133,6 +135,7 @@ class LocalAILanguageModel(LargeLanguageModel): :param tools: tools for tool calling :return: number of tokens """ + def tokens(text: str): return self._get_num_tokens_by_gpt2(text) @@ -193,7 +196,7 @@ class LocalAILanguageModel(LargeLanguageModel): completion_model = LLMMode.COMPLETION.value else: raise ValueError(f"Unknown completion type {credentials['completion_type']}") - + rules = [ ParameterRule( name='temperature', @@ -227,7 +230,7 @@ class LocalAILanguageModel(LargeLanguageModel): ) ] - model_properties = { + model_properties = { ModelPropertyKey.MODE: completion_model, } if completion_model else {} @@ -246,11 +249,11 @@ class LocalAILanguageModel(LargeLanguageModel): return entity - def _generate(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], - model_parameters: dict, tools: list[PromptMessageTool] | None = None, - stop: list[str] | None = None, stream: bool = True, user: str | None = None) \ + def _generate(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], + model_parameters: dict, tools: list[PromptMessageTool] | None = None, + stop: list[str] | None = None, stream: bool = True, user: str | None = None) \ -> LLMResult | Generator: - + kwargs = self._to_client_kwargs(credentials) # init model client client = OpenAI(**kwargs) @@ -271,7 +274,7 @@ class LocalAILanguageModel(LargeLanguageModel): extra_model_kwargs['functions'] = [ helper.dump_model(tool) for tool in tools ] - + if completion_type == 'chat_completion': result = client.chat.completions.create( messages=[self._convert_prompt_message_to_dict(m) for m in prompt_messages], @@ -294,24 +297,24 @@ class LocalAILanguageModel(LargeLanguageModel): if stream: if completion_type == 'completion': return self._handle_completion_generate_stream_response( - model=model, credentials=credentials, response=result, tools=tools, + model=model, credentials=credentials, response=result, tools=tools, prompt_messages=prompt_messages ) return self._handle_chat_generate_stream_response( - model=model, credentials=credentials, response=result, tools=tools, + model=model, credentials=credentials, response=result, tools=tools, prompt_messages=prompt_messages ) - + if completion_type == 'completion': return self._handle_completion_generate_response( - model=model, credentials=credentials, response=result, + model=model, credentials=credentials, response=result, prompt_messages=prompt_messages ) return self._handle_chat_generate_response( - model=model, credentials=credentials, response=result, tools=tools, + model=model, credentials=credentials, response=result, tools=tools, prompt_messages=prompt_messages ) - + def _to_client_kwargs(self, credentials: dict) -> dict: """ Convert invoke kwargs to client kwargs @@ -321,7 +324,7 @@ class LocalAILanguageModel(LargeLanguageModel): """ if not credentials['server_url'].endswith('/'): credentials['server_url'] += '/' - + client_kwargs = { "timeout": Timeout(315.0, read=300.0, write=10.0, connect=5.0), "api_key": "1", @@ -351,9 +354,20 @@ class LocalAILanguageModel(LargeLanguageModel): elif isinstance(message, SystemPromptMessage): message = cast(SystemPromptMessage, message) message_dict = {"role": "system", "content": message.content} + elif isinstance(message, ToolPromptMessage): + # copy from core/model_runtime/model_providers/anthropic/llm/llm.py + message = cast(ToolPromptMessage, message) + message_dict = { + "role": "user", + "content": [{ + "type": "tool_result", + "tool_use_id": message.tool_call_id, + "content": message.content + }] + } else: raise ValueError(f"Unknown message type {type(message)}") - + return message_dict def _convert_prompt_message_to_completion_prompts(self, messages: list[PromptMessage]) -> str: @@ -373,14 +387,14 @@ class LocalAILanguageModel(LargeLanguageModel): prompts += f'{message.content}\n' else: raise ValueError(f"Unknown message type {type(message)}") - + return prompts def _handle_completion_generate_response(self, model: str, - prompt_messages: list[PromptMessage], - credentials: dict, - response: Completion, - ) -> LLMResult: + prompt_messages: list[PromptMessage], + credentials: dict, + response: Completion, + ) -> LLMResult: """ Handle llm chat response @@ -393,7 +407,7 @@ class LocalAILanguageModel(LargeLanguageModel): """ if len(response.choices) == 0: raise InvokeServerUnavailableError("Empty response") - + assistant_message = response.choices[0].text # transform assistant message to prompt message @@ -407,7 +421,8 @@ class LocalAILanguageModel(LargeLanguageModel): ) completion_tokens = self._num_tokens_from_messages(messages=[assistant_prompt_message], tools=[]) - usage = self._calc_response_usage(model=model, credentials=credentials, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens) + usage = self._calc_response_usage(model=model, credentials=credentials, prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens) response = LLMResult( model=model, @@ -436,7 +451,7 @@ class LocalAILanguageModel(LargeLanguageModel): """ if len(response.choices) == 0: raise InvokeServerUnavailableError("Empty response") - + assistant_message = response.choices[0].message # convert function call to tool call @@ -452,7 +467,8 @@ class LocalAILanguageModel(LargeLanguageModel): prompt_tokens = self._num_tokens_from_messages(messages=prompt_messages, tools=tools) completion_tokens = self._num_tokens_from_messages(messages=[assistant_prompt_message], tools=tools) - usage = self._calc_response_usage(model=model, credentials=credentials, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens) + usage = self._calc_response_usage(model=model, credentials=credentials, prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens) response = LLMResult( model=model, @@ -465,10 +481,10 @@ class LocalAILanguageModel(LargeLanguageModel): return response def _handle_completion_generate_stream_response(self, model: str, - prompt_messages: list[PromptMessage], - credentials: dict, - response: Stream[Completion], - tools: list[PromptMessageTool]) -> Generator: + prompt_messages: list[PromptMessage], + credentials: dict, + response: Stream[Completion], + tools: list[PromptMessageTool]) -> Generator: full_response = '' for chunk in response: @@ -496,9 +512,9 @@ class LocalAILanguageModel(LargeLanguageModel): completion_tokens = self._num_tokens_from_messages(messages=[temp_assistant_prompt_message], tools=[]) - usage = self._calc_response_usage(model=model, credentials=credentials, + usage = self._calc_response_usage(model=model, credentials=credentials, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens) - + yield LLMResultChunk( model=model, prompt_messages=prompt_messages, @@ -538,7 +554,7 @@ class LocalAILanguageModel(LargeLanguageModel): if delta.finish_reason is None and (delta.delta.content is None or delta.delta.content == ''): continue - + # check if there is a tool call in the response function_calls = None if delta.delta.function_call: @@ -562,9 +578,9 @@ class LocalAILanguageModel(LargeLanguageModel): prompt_tokens = self._num_tokens_from_messages(messages=prompt_messages, tools=tools) completion_tokens = self._num_tokens_from_messages(messages=[temp_assistant_prompt_message], tools=[]) - usage = self._calc_response_usage(model=model, credentials=credentials, + usage = self._calc_response_usage(model=model, credentials=credentials, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens) - + yield LLMResultChunk( model=model, prompt_messages=prompt_messages, @@ -613,7 +629,7 @@ class LocalAILanguageModel(LargeLanguageModel): ) tool_calls.append(tool_call) - return tool_calls + return tool_calls @property def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]: