fix: image token calc of OpenAI Compatible API (#3368)

This commit is contained in:
takatost 2024-04-11 20:29:48 +08:00 committed by GitHub
parent 693647a141
commit 9a1ea9ac03
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -416,7 +416,7 @@ class OAIAPICompatLargeLanguageModel(_CommonOAI_API_Compat, LargeLanguageModel):
if chunk.startswith(':'): if chunk.startswith(':'):
continue continue
decoded_chunk = chunk.strip().lstrip('data: ').lstrip() decoded_chunk = chunk.strip().lstrip('data: ').lstrip()
chunk_json = None
try: try:
chunk_json = json.loads(decoded_chunk) chunk_json = json.loads(decoded_chunk)
# stream ended # stream ended
@ -620,7 +620,7 @@ class OAIAPICompatLargeLanguageModel(_CommonOAI_API_Compat, LargeLanguageModel):
return message_dict return message_dict
def _num_tokens_from_string(self, model: str, text: str, def _num_tokens_from_string(self, model: str, text: Union[str, list[PromptMessageContent]],
tools: Optional[list[PromptMessageTool]] = None) -> int: tools: Optional[list[PromptMessageTool]] = None) -> int:
""" """
Approximate num tokens for model with gpt2 tokenizer. Approximate num tokens for model with gpt2 tokenizer.
@ -630,7 +630,16 @@ class OAIAPICompatLargeLanguageModel(_CommonOAI_API_Compat, LargeLanguageModel):
:param tools: tools for tool calling :param tools: tools for tool calling
:return: number of tokens :return: number of tokens
""" """
num_tokens = self._get_num_tokens_by_gpt2(text) if isinstance(text, str):
full_text = text
else:
full_text = ''
for message_content in text:
if message_content.type == PromptMessageContentType.TEXT:
message_content = cast(PromptMessageContent, message_content)
full_text += message_content.data
num_tokens = self._get_num_tokens_by_gpt2(full_text)
if tools: if tools:
num_tokens += self._num_tokens_for_tools(tools) num_tokens += self._num_tokens_for_tools(tools)