add new provider Solar (#6884)

This commit is contained in:
JuHyung Son 2024-08-02 21:48:09 +09:00 committed by GitHub
parent 541bf1db5a
commit 2e941bb91c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
22 changed files with 1328 additions and 2 deletions

View File

@ -6,6 +6,7 @@
- nvidia
- nvidia_nim
- cohere
- upstage
- bedrock
- togetherai
- openrouter

View File

@ -0,0 +1,14 @@
<svg width="500" height="162" viewBox="0 0 500 162" fill="none" xmlns="http://www.w3.org/2000/svg">
<path fill-rule="evenodd" clip-rule="evenodd" d="M42.5305 42L40.0623 50.518H16.3342L18.7422 42H42.5305ZM38.0367 59.8974L29.007 92.1937C27.2612 98.4243 29.007 102.277 33.9432 102.277C38.8795 102.277 43.575 98.5146 52.8455 88.5518L61.1227 59.8974H71.9584L61.1227 98.0631C60.28 100.862 61.1227 102.277 63.5909 102.277C66.1596 102.277 71.0349 100.118 82.6087 93.5465L89.8095 68.4768H77.9805L80.4486 59.8986H103.264L101.006 67.634L100.404 69.6808C102.382 67.3205 104.843 65.4115 107.621 64.0824C110.398 62.7532 113.429 62.0348 116.507 61.9754C128.848 61.9754 136.945 71.878 136.945 85.9042C136.945 102.338 125.778 114.769 111.029 114.769C106.921 114.842 102.895 113.609 99.5315 111.248L98.9596 110.826L98.7188 110.616L97.7255 109.743C94.829 106.928 92.7933 103.347 91.8562 99.4187L81.9837 133.762H71.0577L79.4382 104.585C69.4379 110.003 64.3802 111.668 60.1295 111.668C51.4308 111.668 47.9092 106.28 50.4977 96.8892L50.7385 95.9561C41.3476 107.334 35.2375 111.668 28.4351 111.668C19.4054 111.668 14.6798 103.812 17.7499 93.2472L24.7931 68.4756H25.1522L25.1604 68.4456H0L2.40793 59.8974L27.4999 59.8974H38.0367ZM97.0332 91.1414C97.0332 100.322 102.511 105.679 110.096 105.679C119.577 105.679 126.621 97.0409 126.621 85.7838C126.621 76.8143 121.564 71.0352 113.889 71.0352C104.287 71.0352 97.0332 80.5165 97.0332 91.1414ZM201.953 72.3305H216.822V63.692H201.953V46.3249H201.743L191.419 54.3312V63.692H182.871V72.3305H191.419V99.0585C191.419 102.43 191.419 114.861 205.114 114.861C208.988 115.003 212.812 113.952 216.07 111.851V103.965H215.859C213.598 105.225 211.072 105.936 208.485 106.041C203.218 106.041 201.953 102.279 201.953 94.9951V72.3305ZM142.031 100.5V109.53C147.611 113.051 154.065 114.938 160.663 114.978C171.107 114.978 179.324 109.59 179.324 99.597C179.324 89.4421 170.197 86.5376 162.598 84.1193C157.179 82.3946 152.536 80.9172 152.536 77.2334C152.536 73.6516 156.028 71.2136 161.807 71.2136C167.043 71.2212 172.142 72.8859 176.375 75.9692H176.585V66.9395C172.007 64.0811 166.722 62.5591 161.325 62.545C150.188 62.545 142.814 68.5648 142.814 77.9257C142.814 87.5253 151.332 90.2816 158.735 92.6769C164.423 94.5174 169.452 96.1448 169.452 100.5C169.452 104.292 165.569 106.7 159.549 106.7C153.327 106.458 147.332 104.292 142.393 100.5H142.031ZM266.552 79.4936V113.746H258.425L257.492 106.071C255.696 108.954 253.167 111.308 250.163 112.895C247.16 114.481 243.79 115.242 240.396 115.101C230.915 115.101 222.066 109.623 222.066 99.5095C222.066 87.801 232.6 84.1289 244.188 84.1289C251.894 84.1289 256.228 83.7075 256.228 78.7412C256.228 73.7748 251.773 71.6077 244.73 71.6077C237.667 71.573 230.852 74.2068 225.647 78.982H225.437L225.768 69.5007C231.407 65.0923 238.299 62.5844 245.453 62.3371C255.897 62.2168 266.552 66.551 266.552 79.4936ZM256.77 93.6402V88.3729C254.422 91.3828 249.697 92.045 243.466 92.045C237.236 92.045 232.42 94.3626 232.42 99.4193C232.42 104.476 237.567 106.794 242.623 106.794C244.427 106.908 246.236 106.654 247.938 106.046C249.641 105.439 251.202 104.49 252.526 103.26C253.849 102.029 254.909 100.541 255.638 98.8869C256.368 97.2331 256.753 95.4478 256.77 93.6402ZM324.577 63.6931H316.481L315.307 72.2412C313.561 69.1151 310.983 66.5344 307.859 64.7861C304.734 63.0379 301.186 62.1906 297.609 62.3386C284.756 62.3386 273.891 72.3315 273.891 87.6218C273.891 103.424 284.425 112.905 296.856 112.905C300.191 113.01 303.501 112.297 306.497 110.828C309.493 109.359 312.084 107.178 314.043 104.477V108.481C314.043 118.925 307.722 124.614 298.451 124.614C290.792 124.634 283.357 122.032 277.382 117.239H277.171V126.811C283.552 131.553 291.347 134.003 299.294 133.764C314.253 133.764 324.577 124.704 324.577 106.494V63.6931ZM299.806 71.3984C309.287 71.3984 314.584 79.2844 314.584 87.5014C314.584 96.0496 309.287 103.845 299.806 103.845C289.602 103.845 284.756 95.9292 284.756 87.5014C284.756 79.0737 289.271 71.3984 299.806 71.3984ZM348.753 91.8308C345.705 91.8308 343.327 92.2803 343.327 95.3068C343.327 100.461 349.606 105.795 359.329 105.795C362.879 105.975 366.43 105.446 369.766 104.241C373.103 103.036 376.157 101.179 378.745 98.7828H378.958V107.773C373.384 112.554 366.207 115.138 358.811 115.024C343.023 115.024 332.02 104.416 332.02 88.5645C331.884 85.1244 332.457 81.6929 333.705 78.4762C334.953 75.2596 336.85 72.3244 339.282 69.8471C341.713 67.3697 344.629 65.4014 347.854 64.0606C351.08 62.7198 354.547 62.034 358.049 62.0447C363.658 61.8285 369.142 63.7075 373.4 67.3041C377.658 70.9007 380.373 75.9484 381 81.4326V91.8308H348.753ZM350.246 84.1895H370.424C370.49 82.458 370.188 80.7321 369.538 79.1217C368.887 77.5114 367.902 76.052 366.646 74.8367C365.39 73.6214 363.89 72.677 362.242 72.0636C360.594 71.4502 358.833 71.1813 357.074 71.2742C347.93 71.2742 343.114 77.567 342.474 86.4968C344.704 84.8454 347.459 84.0275 350.246 84.1895Z" fill="#805CFB"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M487.652 28L486.017 33.7297H497.393L499.014 28H487.652Z" fill="#805CFB"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M471.995 38.027L470.346 43.7568H494.549L496.17 38.027H471.995Z" fill="#805CFB"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M457.548 48.054L455.898 53.7838H491.705L493.326 48.054H457.548Z" fill="#805CFB"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M413.494 58.0811L411.844 63.8108H488.861L490.482 58.0811H413.494Z" fill="#805CFB"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M418.03 68.1081L416.38 73.8378H486.017L487.638 68.1081H418.03Z" fill="#805CFB"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M420.348 134L421.997 128.27H410.607L409 134H420.348Z" fill="#805CFB"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M436.019 123.973L437.668 118.243H413.451L411.844 123.973H436.019Z" fill="#805CFB"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M450.466 113.946L452.116 108.216H416.295L414.688 113.946H450.466Z" fill="#805CFB"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M494.521 103.919L496.17 98.1891H419.139L417.532 103.919H494.521Z" fill="#805CFB"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M489.97 93.8919L491.62 88.1622H421.983L420.376 93.8919H489.97Z" fill="#805CFB"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M483.372 83.8649L485.021 78.1351H419.679L418.073 83.8649H483.372Z" fill="#805CFB"/>
</svg>

After

Width:  |  Height:  |  Size: 6.2 KiB

View File

@ -0,0 +1,3 @@
<svg width="137" height="163" viewBox="0 0 137 163" fill="none" xmlns="http://www.w3.org/2000/svg">
<path fill-rule="evenodd" clip-rule="evenodd" d="M104.652 29.325L103.017 35.0547H114.393L116.014 29.325H104.652ZM88.9956 39.352L87.346 45.0817H111.549L113.17 39.352H88.9956ZM72.8984 55.1088L74.5479 49.379H110.326L108.705 55.1088H72.8984ZM30.4937 59.4061L28.8442 65.1358H105.861L107.482 59.4061H30.4937ZM33.3802 75.1628L35.0298 69.4331H104.638L103.017 75.1628H33.3802ZM37.3478 135.325L38.9973 129.595H27.6069L26 135.325H37.3478ZM54.6682 119.568L53.0186 125.298H28.8442L30.4511 119.568H54.6682ZM67.4662 115.271L69.1157 109.541H33.2949L31.688 115.271H67.4662ZM113.17 99.5142L111.521 105.244H34.5322L36.1391 99.5142H113.17ZM106.97 95.2169L108.62 89.4871H38.9832L37.3763 95.2169H106.97ZM102.021 79.4601L100.372 85.1898H35.0724L36.6793 79.4601H102.021Z" fill="#805CFB"/>
</svg>

After

Width:  |  Height:  |  Size: 872 B

View File

@ -0,0 +1,57 @@
from collections.abc import Mapping
import openai
from httpx import Timeout
from core.model_runtime.errors.invoke import (
InvokeAuthorizationError,
InvokeBadRequestError,
InvokeConnectionError,
InvokeError,
InvokeRateLimitError,
InvokeServerUnavailableError,
)
class _CommonUpstage:
def _to_credential_kwargs(self, credentials: Mapping) -> dict:
"""
Transform credentials to kwargs for model instance
:param credentials:
:return:
"""
credentials_kwargs = {
"api_key": credentials['upstage_api_key'],
"base_url": "https://api.upstage.ai/v1/solar",
"timeout": Timeout(315.0, read=300.0, write=20.0, connect=10.0),
"max_retries": 1
}
return credentials_kwargs
@property
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
"""
Map model invoke error to unified error
The key is the error type thrown to the caller
The value is the error type thrown by the model,
which needs to be converted into a unified error type for the caller.
:return: Invoke error mapping
"""
return {
InvokeConnectionError: [openai.APIConnectionError, openai.APITimeoutError],
InvokeServerUnavailableError: [openai.InternalServerError],
InvokeRateLimitError: [openai.RateLimitError],
InvokeAuthorizationError: [openai.AuthenticationError, openai.PermissionDeniedError],
InvokeBadRequestError: [
openai.BadRequestError,
openai.NotFoundError,
openai.UnprocessableEntityError,
openai.APIError,
],
}

View File

@ -0,0 +1 @@
- soloar-1-mini-chat

View File

@ -0,0 +1,575 @@
import logging
from collections.abc import Generator
from typing import Optional, Union, cast
from openai import OpenAI, Stream
from openai.types.chat import ChatCompletion, ChatCompletionChunk, ChatCompletionMessageToolCall
from openai.types.chat.chat_completion_chunk import ChoiceDeltaFunctionCall, ChoiceDeltaToolCall
from openai.types.chat.chat_completion_message import FunctionCall
from tokenizers import Tokenizer
from core.model_runtime.callbacks.base_callback import Callback
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
from core.model_runtime.entities.message_entities import (
AssistantPromptMessage,
ImagePromptMessageContent,
PromptMessage,
PromptMessageContentType,
PromptMessageTool,
SystemPromptMessage,
TextPromptMessageContent,
ToolPromptMessage,
UserPromptMessage,
)
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
from core.model_runtime.model_providers.upstage._common import _CommonUpstage
logger = logging.getLogger(__name__)
UPSTAGE_BLOCK_MODE_PROMPT = """You should always follow the instructions and output a valid {{block}} object.
The structure of the {{block}} object you can found in the instructions, use {"answer": "$your_answer"} as the default structure
if you are not sure about the structure.
<instructions>
{{instructions}}
</instructions>
"""
class UpstageLargeLanguageModel(_CommonUpstage, LargeLanguageModel):
"""
Model class for Upstage large language model.
"""
def _invoke(self, model: str, credentials: dict,
prompt_messages: list[PromptMessage], model_parameters: dict,
tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
stream: bool = True, user: Optional[str] = None) -> Union[LLMResult, Generator]:
"""
Invoke large language model
:param model: model name
:param credentials: model credentials
:param prompt_messages: prompt messages
:param model_parameters: model parameters
:param tools: tools for tool calling
:param stop: stop words
:param stream: is stream response
:param user: unique user id
:return: full response or stream response chunk generator result
"""
return self._chat_generate(
model=model,
credentials=credentials,
prompt_messages=prompt_messages,
model_parameters=model_parameters,
tools=tools,
stop=stop,
stream=stream,
user=user
)
def _code_block_mode_wrapper(self,
model: str, credentials: dict, prompt_messages: list[PromptMessage], model_parameters: dict, tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None, stream: bool = True, user: Optional[str] = None, callbacks: Optional[list[Callback]] = None) -> Union[LLMResult, Generator]:
"""
Code block mode wrapper for invoking large language model
"""
if 'response_format' in model_parameters and model_parameters['response_format'] in ['JSON', 'XML']:
stop = stop or []
self._transform_chat_json_prompts(
model=model,
credentials=credentials,
prompt_messages=prompt_messages,
model_parameters=model_parameters,
tools=tools,
stop=stop,
stream=stream,
user=user,
response_format=model_parameters['response_format']
)
model_parameters.pop('response_format')
return self._invoke(
model=model,
credentials=credentials,
prompt_messages=prompt_messages,
model_parameters=model_parameters,
tools=tools,
stop=stop,
stream=stream,
user=user
)
def _transform_chat_json_prompts(self, model: str, credentials: dict,
prompt_messages: list[PromptMessage], model_parameters: dict,
tools: list[PromptMessageTool] | None = None, stop: list[str] | None = None,
stream: bool = True, user: str | None = None, response_format: str = 'JSON') -> None:
"""
Transform json prompts
"""
if stop is None:
stop = []
if "```\n" not in stop:
stop.append("```\n")
if "\n```" not in stop:
stop.append("\n```")
if len(prompt_messages) > 0 and isinstance(prompt_messages[0], SystemPromptMessage):
prompt_messages[0] = SystemPromptMessage(
content=UPSTAGE_BLOCK_MODE_PROMPT
.replace("{{instructions}}", prompt_messages[0].content)
.replace("{{block}}", response_format)
)
prompt_messages.append(AssistantPromptMessage(content=f"\n```{response_format}\n"))
else:
prompt_messages.insert(0, SystemPromptMessage(
content=UPSTAGE_BLOCK_MODE_PROMPT
.replace("{{instructions}}", f"Please output a valid {response_format} object.")
.replace("{{block}}", response_format)
))
prompt_messages.append(AssistantPromptMessage(content=f"\n```{response_format}"))
def get_num_tokens(self, model: str, credentials: dict, prompt_messages: list[PromptMessage], tools: Optional[list[PromptMessageTool]] = None) -> int:
"""
Get number of tokens for given prompt messages
:param model: model name
:param credentials: model credentials
:param prompt_messages: prompt messages
:param tools: tools for tool calling
:return:
"""
return self._num_tokens_from_messages(model, prompt_messages, tools)
def validate_credentials(self, model: str, credentials: dict) -> None:
"""
Validate model credentials
:param model: model name
:param credentials: model credentials
:return:
"""
try:
credentials_kwargs = self._to_credential_kwargs(credentials)
client = OpenAI(**credentials_kwargs)
client.chat.completions.create(
messages=[{"role": "user", "content": "ping"}],
model=model,
temperature=0,
max_tokens=10,
stream=False
)
except Exception as e:
raise CredentialsValidateFailedError(str(e))
def _chat_generate(self, model: str, credentials: dict,
prompt_messages: list[PromptMessage], model_parameters: dict,
tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None,
stream: bool = True, user: Optional[str] = None) -> Union[LLMResult, Generator]:
credentials_kwargs = self._to_credential_kwargs(credentials)
client = OpenAI(**credentials_kwargs)
extra_model_kwargs = {}
if tools:
extra_model_kwargs["functions"] = [{
"name": tool.name,
"description": tool.description,
"parameters": tool.parameters
} for tool in tools]
if stop:
extra_model_kwargs["stop"] = stop
if user:
extra_model_kwargs["user"] = user
# chat model
response = client.chat.completions.create(
messages=[self._convert_prompt_message_to_dict(m) for m in prompt_messages],
model=model,
stream=stream,
**model_parameters,
**extra_model_kwargs,
)
if stream:
return self._handle_chat_generate_stream_response(model, credentials, response, prompt_messages, tools)
return self._handle_chat_generate_response(model, credentials, response, prompt_messages, tools)
def _handle_chat_generate_response(self, model: str, credentials: dict, response: ChatCompletion,
prompt_messages: list[PromptMessage],
tools: Optional[list[PromptMessageTool]] = None) -> LLMResult:
"""
Handle llm chat response
:param model: model name
:param credentials: credentials
:param response: response
:param prompt_messages: prompt messages
:param tools: tools for tool calling
:return: llm response
"""
assistant_message = response.choices[0].message
# assistant_message_tool_calls = assistant_message.tool_calls
assistant_message_function_call = assistant_message.function_call
# extract tool calls from response
# tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls)
function_call = self._extract_response_function_call(assistant_message_function_call)
tool_calls = [function_call] if function_call else []
# transform assistant message to prompt message
assistant_prompt_message = AssistantPromptMessage(
content=assistant_message.content,
tool_calls=tool_calls
)
# calculate num tokens
if response.usage:
# transform usage
prompt_tokens = response.usage.prompt_tokens
completion_tokens = response.usage.completion_tokens
else:
# calculate num tokens
prompt_tokens = self._num_tokens_from_messages(model, prompt_messages, tools)
completion_tokens = self._num_tokens_from_messages(model, [assistant_prompt_message])
# transform usage
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
# transform response
response = LLMResult(
model=response.model,
prompt_messages=prompt_messages,
message=assistant_prompt_message,
usage=usage,
system_fingerprint=response.system_fingerprint,
)
return response
def _handle_chat_generate_stream_response(self, model: str, credentials: dict, response: Stream[ChatCompletionChunk],
prompt_messages: list[PromptMessage],
tools: Optional[list[PromptMessageTool]] = None) -> Generator:
"""
Handle llm chat stream response
:param model: model name
:param response: response
:param prompt_messages: prompt messages
:param tools: tools for tool calling
:return: llm response chunk generator
"""
full_assistant_content = ''
delta_assistant_message_function_call_storage: Optional[ChoiceDeltaFunctionCall] = None
prompt_tokens = 0
completion_tokens = 0
final_tool_calls = []
final_chunk = LLMResultChunk(
model=model,
prompt_messages=prompt_messages,
delta=LLMResultChunkDelta(
index=0,
message=AssistantPromptMessage(content=''),
)
)
for chunk in response:
if len(chunk.choices) == 0:
if chunk.usage:
# calculate num tokens
prompt_tokens = chunk.usage.prompt_tokens
completion_tokens = chunk.usage.completion_tokens
continue
delta = chunk.choices[0]
has_finish_reason = delta.finish_reason is not None
if not has_finish_reason and (delta.delta.content is None or delta.delta.content == '') and \
delta.delta.function_call is None:
continue
# assistant_message_tool_calls = delta.delta.tool_calls
assistant_message_function_call = delta.delta.function_call
# extract tool calls from response
if delta_assistant_message_function_call_storage is not None:
# handle process of stream function call
if assistant_message_function_call:
# message has not ended ever
delta_assistant_message_function_call_storage.arguments += assistant_message_function_call.arguments
continue
else:
# message has ended
assistant_message_function_call = delta_assistant_message_function_call_storage
delta_assistant_message_function_call_storage = None
else:
if assistant_message_function_call:
# start of stream function call
delta_assistant_message_function_call_storage = assistant_message_function_call
if delta_assistant_message_function_call_storage.arguments is None:
delta_assistant_message_function_call_storage.arguments = ''
if not has_finish_reason:
continue
# tool_calls = self._extract_response_tool_calls(assistant_message_tool_calls)
function_call = self._extract_response_function_call(assistant_message_function_call)
tool_calls = [function_call] if function_call else []
if tool_calls:
final_tool_calls.extend(tool_calls)
# transform assistant message to prompt message
assistant_prompt_message = AssistantPromptMessage(
content=delta.delta.content if delta.delta.content else '',
tool_calls=tool_calls
)
full_assistant_content += delta.delta.content if delta.delta.content else ''
if has_finish_reason:
final_chunk = LLMResultChunk(
model=chunk.model,
prompt_messages=prompt_messages,
system_fingerprint=chunk.system_fingerprint,
delta=LLMResultChunkDelta(
index=delta.index,
message=assistant_prompt_message,
finish_reason=delta.finish_reason,
)
)
else:
yield LLMResultChunk(
model=chunk.model,
prompt_messages=prompt_messages,
system_fingerprint=chunk.system_fingerprint,
delta=LLMResultChunkDelta(
index=delta.index,
message=assistant_prompt_message,
)
)
if not prompt_tokens:
prompt_tokens = self._num_tokens_from_messages(model, prompt_messages, tools)
if not completion_tokens:
full_assistant_prompt_message = AssistantPromptMessage(
content=full_assistant_content,
tool_calls=final_tool_calls
)
completion_tokens = self._num_tokens_from_messages(model, [full_assistant_prompt_message])
# transform usage
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)
final_chunk.delta.usage = usage
yield final_chunk
def _extract_response_tool_calls(self,
response_tool_calls: list[ChatCompletionMessageToolCall | ChoiceDeltaToolCall]) \
-> list[AssistantPromptMessage.ToolCall]:
"""
Extract tool calls from response
:param response_tool_calls: response tool calls
:return: list of tool calls
"""
tool_calls = []
if response_tool_calls:
for response_tool_call in response_tool_calls:
function = AssistantPromptMessage.ToolCall.ToolCallFunction(
name=response_tool_call.function.name,
arguments=response_tool_call.function.arguments
)
tool_call = AssistantPromptMessage.ToolCall(
id=response_tool_call.id,
type=response_tool_call.type,
function=function
)
tool_calls.append(tool_call)
return tool_calls
def _extract_response_function_call(self, response_function_call: FunctionCall | ChoiceDeltaFunctionCall) \
-> AssistantPromptMessage.ToolCall:
"""
Extract function call from response
:param response_function_call: response function call
:return: tool call
"""
tool_call = None
if response_function_call:
function = AssistantPromptMessage.ToolCall.ToolCallFunction(
name=response_function_call.name,
arguments=response_function_call.arguments
)
tool_call = AssistantPromptMessage.ToolCall(
id=response_function_call.name,
type="function",
function=function
)
return tool_call
def _convert_prompt_message_to_dict(self, message: PromptMessage) -> dict:
"""
Convert PromptMessage to dict for Upstage API
"""
if isinstance(message, UserPromptMessage):
message = cast(UserPromptMessage, message)
if isinstance(message.content, str):
message_dict = {"role": "user", "content": message.content}
else:
sub_messages = []
for message_content in message.content:
if message_content.type == PromptMessageContentType.TEXT:
message_content = cast(TextPromptMessageContent, message_content)
sub_message_dict = {
"type": "text",
"text": message_content.data
}
sub_messages.append(sub_message_dict)
elif message_content.type == PromptMessageContentType.IMAGE:
message_content = cast(ImagePromptMessageContent, message_content)
sub_message_dict = {
"type": "image_url",
"image_url": {
"url": message_content.data,
"detail": message_content.detail.value
}
}
sub_messages.append(sub_message_dict)
message_dict = {"role": "user", "content": sub_messages}
elif isinstance(message, AssistantPromptMessage):
message = cast(AssistantPromptMessage, message)
message_dict = {"role": "assistant", "content": message.content}
if message.tool_calls:
# message_dict["tool_calls"] = [tool_call.dict() for tool_call in
# message.tool_calls]
function_call = message.tool_calls[0]
message_dict["function_call"] = {
"name": function_call.function.name,
"arguments": function_call.function.arguments,
}
elif isinstance(message, SystemPromptMessage):
message = cast(SystemPromptMessage, message)
message_dict = {"role": "system", "content": message.content}
elif isinstance(message, ToolPromptMessage):
message = cast(ToolPromptMessage, message)
# message_dict = {
# "role": "tool",
# "content": message.content,
# "tool_call_id": message.tool_call_id
# }
message_dict = {
"role": "function",
"content": message.content,
"name": message.tool_call_id
}
else:
raise ValueError(f"Got unknown type {message}")
if message.name:
message_dict["name"] = message.name
return message_dict
def _get_tokenizer(self) -> Tokenizer:
return Tokenizer.from_pretrained("upstage/solar-1-mini-tokenizer")
def _num_tokens_from_messages(self, model: str, messages: list[PromptMessage],
tools: Optional[list[PromptMessageTool]] = None) -> int:
"""
Calculate num tokens for solar with Huggingface Solar tokenizer.
Solar tokenizer is opened in huggingface https://huggingface.co/upstage/solar-1-mini-tokenizer
"""
tokenizer = self._get_tokenizer()
tokens_per_message = 5 # <|im_start|>{role}\n{message}<|im_end|>
tokens_prefix = 1 # <|startoftext|>
tokens_suffix = 3 # <|im_start|>assistant\n
num_tokens = 0
num_tokens += tokens_prefix
messages_dict = [self._convert_prompt_message_to_dict(message) for message in messages]
for message in messages_dict:
num_tokens += tokens_per_message
for key, value in message.items():
if isinstance(value, list):
text = ''
for item in value:
if isinstance(item, dict) and item['type'] == 'text':
text += item['text']
value = text
if key == "tool_calls":
for tool_call in value:
for t_key, t_value in tool_call.items():
num_tokens += len(tokenizer.encode(t_key, add_special_tokens=False))
if t_key == "function":
for f_key, f_value in t_value.items():
num_tokens += len(tokenizer.encode(f_key, add_special_tokens=False))
num_tokens += len(tokenizer.encode(f_value, add_special_tokens=False))
else:
num_tokens += len(tokenizer.encode(t_key, add_special_tokens=False))
num_tokens += len(tokenizer.encode(t_value, add_special_tokens=False))
else:
num_tokens += len(tokenizer.encode(str(value), add_special_tokens=False))
num_tokens += tokens_suffix
if tools:
num_tokens += self._num_tokens_for_tools(tokenizer, tools)
return num_tokens
def _num_tokens_for_tools(self, tokenizer: Tokenizer, tools: list[PromptMessageTool]) -> int:
"""
Calculate num tokens for tool calling with upstage tokenizer.
:param tokenizer: huggingface tokenizer
:param tools: tools for tool calling
:return: number of tokens
"""
num_tokens = 0
for tool in tools:
num_tokens += len(tokenizer.encode('type'))
num_tokens += len(tokenizer.encode('function'))
# calculate num tokens for function object
num_tokens += len(tokenizer.encode('name'))
num_tokens += len(tokenizer.encode(tool.name))
num_tokens += len(tokenizer.encode('description'))
num_tokens += len(tokenizer.encode(tool.description))
parameters = tool.parameters
num_tokens += len(tokenizer.encode('parameters'))
if 'title' in parameters:
num_tokens += len(tokenizer.encode('title'))
num_tokens += len(tokenizer.encode(parameters.get("title")))
num_tokens += len(tokenizer.encode('type'))
num_tokens += len(tokenizer.encode(parameters.get("type")))
if 'properties' in parameters:
num_tokens += len(tokenizer.encode('properties'))
for key, value in parameters.get('properties').items():
num_tokens += len(tokenizer.encode(key))
for field_key, field_value in value.items():
num_tokens += len(tokenizer.encode(field_key))
if field_key == 'enum':
for enum_field in field_value:
num_tokens += 3
num_tokens += len(tokenizer.encode(enum_field))
else:
num_tokens += len(tokenizer.encode(field_key))
num_tokens += len(tokenizer.encode(str(field_value)))
if 'required' in parameters:
num_tokens += len(tokenizer.encode('required'))
for required_field in parameters['required']:
num_tokens += 3
num_tokens += len(tokenizer.encode(required_field))
return num_tokens

View File

@ -0,0 +1,43 @@
model: solar-1-mini-chat
label:
zh_Hans: solar-1-mini-chat
en_US: solar-1-mini-chat
ko_KR: solar-1-mini-chat
model_type: llm
features:
- multi-tool-call
- agent-thought
- stream-tool-call
model_properties:
mode: chat
context_size: 32768
parameter_rules:
- name: temperature
use_template: temperature
- name: top_p
use_template: top_p
- name: max_tokens
use_template: max_tokens
default: 512
min: 1
max: 32768
- name: seed
label:
zh_Hans: 种子
en_US: Seed
type: int
help:
zh_Hans:
如果指定,模型将尽最大努力进行确定性采样,使得重复的具有相同种子和参数的请求应该返回相同的结果。不能保证确定性,您应该参考 system_fingerprint
响应参数来监视变化。
en_US:
If specified, model will make a best effort to sample deterministically,
such that repeated requests with the same seed and parameters should return
the same result. Determinism is not guaranteed, and you should refer to the
system_fingerprint response parameter to monitor changes in the backend.
required: false
pricing:
input: "0.5"
output: "0.5"
unit: "0.000001"
currency: USD

View File

@ -0,0 +1,9 @@
model: solar-embedding-1-large-passage
model_type: text-embedding
model_properties:
context_size: 4000
max_chunks: 32
pricing:
input: '0.1'
unit: '0.000001'
currency: 'USD'

View File

@ -0,0 +1,9 @@
model: solar-embedding-1-large-query
model_type: text-embedding
model_properties:
context_size: 4000
max_chunks: 32
pricing:
input: '0.1'
unit: '0.000001'
currency: 'USD'

View File

@ -0,0 +1,195 @@
import base64
import time
from collections.abc import Mapping
from typing import Union
import numpy as np
from openai import OpenAI
from tokenizers import Tokenizer
from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
from core.model_runtime.model_providers.upstage._common import _CommonUpstage
class UpstageTextEmbeddingModel(_CommonUpstage, TextEmbeddingModel):
"""
Model class for Upstage text embedding model.
"""
def _get_tokenizer(self) -> Tokenizer:
return Tokenizer.from_pretrained("upstage/solar-1-mini-tokenizer")
def _invoke(self, model: str, credentials: dict, texts: list[str], user: str | None = None) -> TextEmbeddingResult:
"""
Invoke text embedding model
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:param user: unique user id
:return: embeddings result
"""
credentials_kwargs = self._to_credential_kwargs(credentials)
client = OpenAI(**credentials_kwargs)
extra_model_kwargs = {}
if user:
extra_model_kwargs["user"] = user
extra_model_kwargs["encoding_format"] = "base64"
context_size = self._get_context_size(model, credentials)
max_chunks = self._get_max_chunks(model, credentials)
embeddings: list[list[float]] = [[] for _ in range(len(texts))]
tokens = []
indices = []
used_tokens = 0
tokenizer = self._get_tokenizer()
for i, text in enumerate(texts):
token = tokenizer.encode(text, add_special_tokens=False).tokens
for j in range(0, len(token), context_size):
tokens += [token[j:j+context_size]]
indices += [i]
batched_embeddings = []
_iter = range(0, len(tokens), max_chunks)
for i in _iter:
embeddings_batch, embedding_used_tokens = self._embedding_invoke(
model=model,
client=client,
texts=tokens[i:i+max_chunks],
extra_model_kwargs=extra_model_kwargs,
)
used_tokens += embedding_used_tokens
batched_embeddings += embeddings_batch
results: list[list[list[float]]] = [[] for _ in range(len(texts))]
num_tokens_in_batch: list[list[int]] = [[] for _ in range(len(texts))]
for i in range(len(indices)):
results[indices[i]].append(batched_embeddings[i])
num_tokens_in_batch[indices[i]].append(len(tokens[i]))
for i in range(len(texts)):
_result = results[i]
if len(_result) == 0:
embeddings_batch, embedding_used_tokens = self._embedding_invoke(
model=model,
client=client,
texts=[texts[i]],
extra_model_kwargs=extra_model_kwargs,
)
used_tokens += embedding_used_tokens
average = embeddings_batch[0]
else:
average = np.average(_result, axis=0, weights=num_tokens_in_batch[i])
embeddings[i] = (average / np.linalg.norm(average)).tolist()
usage = self._calc_response_usage(
model=model,
credentials=credentials,
tokens=used_tokens
)
return TextEmbeddingResult(embeddings=embeddings, usage=usage, model=model)
def get_num_tokens(self, model: str, credentials: dict, texts: list[str]) -> int:
tokenizer = self._get_tokenizer()
"""
Get number of tokens for given prompt messages
:param model: model name
:param credentials: model credentials
:param texts: texts to embed
:return:
"""
if len(texts) == 0:
return 0
tokenizer = self._get_tokenizer()
total_num_tokens = 0
for text in texts:
# calculate the number of tokens in the encoded text
tokenized_text = tokenizer.encode(text)
total_num_tokens += len(tokenized_text)
return total_num_tokens
def validate_credentials(self, model: str, credentials: Mapping) -> None:
"""
Validate model credentials
:param model: model name
:param credentials: model credentials
:return:
"""
try:
# transform credentials to kwargs for model instance
credentials_kwargs = self._to_credential_kwargs(credentials)
client = OpenAI(**credentials_kwargs)
# call embedding model
self._embedding_invoke(
model=model,
client=client,
texts=['ping'],
extra_model_kwargs={}
)
except Exception as ex:
raise CredentialsValidateFailedError(str(ex))
def _embedding_invoke(self, model: str, client: OpenAI, texts: Union[list[str], str], extra_model_kwargs: dict) -> tuple[list[list[float]], int]:
"""
Invoke embedding model
:param model: model name
:param client: model client
:param texts: texts to embed
:param extra_model_kwargs: extra model kwargs
:return: embeddings and used tokens
"""
response = client.embeddings.create(
model=model,
input=texts,
**extra_model_kwargs
)
if 'encoding_format' in extra_model_kwargs and extra_model_kwargs['encoding_format'] == 'base64':
return ([list(np.frombuffer(base64.b64decode(embedding.embedding), dtype=np.float32)) for embedding in response.data], response.usage.total_tokens)
return [data.embedding for data in response.data], response.usage.total_tokens
def _calc_response_usage(self, model: str, credentials: dict, tokens: int) -> EmbeddingUsage:
"""
Calculate response usage
:param model: model name
:param credentials: model credentials
:param tokens: input tokens
:return: usage
"""
input_price_info = self.get_price(
model=model,
credentials=credentials,
tokens=tokens,
price_type=PriceType.INPUT
)
usage = EmbeddingUsage(
tokens=tokens,
total_tokens=tokens,
unit_price=input_price_info.unit_price,
price_unit=input_price_info.unit,
total_price=input_price_info.total_amount,
currency=input_price_info.currency,
latency=time.perf_counter() - self.started_at
)
return usage

View File

@ -0,0 +1,32 @@
import logging
from core.model_runtime.entities.model_entities import ModelType
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.model_provider import ModelProvider
logger = logging.getLogger(__name__)
class UpstageProvider(ModelProvider):
def validate_provider_credentials(self, credentials: dict) -> None:
"""
Validate provider credentials
if validate failed, raise exception
:param credentials: provider credentials, credentials from defined in `provider_credential_schema`.
"""
try:
model_instance = self.get_model_instance(ModelType.LLM)
model_instance.validate_credentials(
model="solar-1-mini-chat",
credentials=credentials
)
except CredentialsValidateFailedError as e:
logger.exception(f'{self.get_provider_schema().provider} credentials validate failed')
raise e
except Exception as e:
logger.exception(f'{self.get_provider_schema().provider} credentials validate failed')
raise e

View File

@ -0,0 +1,49 @@
provider: upstage
label:
en_US: Upstage
description:
en_US: Models provided by Upstage, such as Solar-1-mini-chat.
zh_Hans: Upstage 提供的模型,例如 Solar-1-mini-chat.
icon_small:
en_US: icon_s_en.svg
icon_large:
en_US: icon_l_en.svg
background: "#FFFFF"
help:
title:
en_US: Get your API Key from Upstage
zh_Hans: 从 Upstage 获取 API Key
url:
en_US: https://console.upstage.ai/api-keys
supported_model_types:
- llm
- text-embedding
configurate_methods:
- predefined-model
model_credential_schema:
model:
label:
en_US: Model Name
zh_Hans: 模型名称
placeholder:
en_US: Enter your model name
zh_Hans: 输入模型名称
credential_form_schemas:
- variable: upstage_api_key
label:
en_US: API Key
type: secret-input
required: true
placeholder:
zh_Hans: 在此输入您的 API Key
en_US: Enter your API Key
provider_credential_schema:
credential_form_schemas:
- variable: upstage_api_key
label:
en_US: API Key
type: secret-input
required: true
placeholder:
zh_Hans: 在此输入您的 API Key
en_US: Enter your API Key

View File

@ -4,7 +4,7 @@ set -e
if [[ "${MIGRATION_ENABLED}" == "true" ]]; then
echo "Running migrations"
flask upgrade-db
flask db upgrade
fi
if [[ "${MODE}" == "worker" ]]; then

View File

@ -73,6 +73,7 @@ quote-style = "single"
[tool.pytest_env]
OPENAI_API_KEY = "sk-IamNotARealKeyJustForMockTestKawaiiiiiiiiii"
UPSTAGE_API_KEY = "up-aaaaaaaaaaaaaaaaaaaa"
AZURE_OPENAI_API_BASE = "https://difyai-openai.openai.azure.com"
AZURE_OPENAI_API_KEY = "xxxxb1707exxxxxxxxxxaaxxxxxf94"
ANTHROPIC_API_KEY = "sk-ant-api11-IamNotARealKeyJustForMockTestKawaiiiiiiiiii-NotBaka-ASkksz"

View File

@ -0,0 +1,245 @@
import os
from collections.abc import Generator
import pytest
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
from core.model_runtime.entities.message_entities import (
AssistantPromptMessage,
PromptMessageTool,
SystemPromptMessage,
UserPromptMessage,
)
from core.model_runtime.entities.model_entities import AIModelEntity, ModelType
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
from core.model_runtime.model_providers.upstage.llm.llm import UpstageLargeLanguageModel
"""FOR MOCK FIXTURES, DO NOT REMOVE"""
from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
def test_predefined_models():
model = UpstageLargeLanguageModel()
model_schemas = model.predefined_models()
assert len(model_schemas) >= 1
assert isinstance(model_schemas[0], AIModelEntity)
@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
def test_validate_credentials_for_chat_model(setup_openai_mock):
model = UpstageLargeLanguageModel()
with pytest.raises(CredentialsValidateFailedError):
# model name to gpt-3.5-turbo because of mocking
model.validate_credentials(
model='gpt-3.5-turbo',
credentials={
'upstage_api_key': 'invalid_key'
}
)
model.validate_credentials(
model='solar-1-mini-chat',
credentials={
'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
}
)
@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
def test_invoke_chat_model(setup_openai_mock):
model = UpstageLargeLanguageModel()
result = model.invoke(
model='solar-1-mini-chat',
credentials={
'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
},
prompt_messages=[
SystemPromptMessage(
content='You are a helpful AI assistant.',
),
UserPromptMessage(
content='Hello World!'
)
],
model_parameters={
'temperature': 0.0,
'top_p': 1.0,
'presence_penalty': 0.0,
'frequency_penalty': 0.0,
'max_tokens': 10
},
stop=['How'],
stream=False,
user="abc-123"
)
assert isinstance(result, LLMResult)
assert len(result.message.content) > 0
@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
def test_invoke_chat_model_with_tools(setup_openai_mock):
model = UpstageLargeLanguageModel()
result = model.invoke(
model='solar-1-mini-chat',
credentials={
'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
},
prompt_messages=[
SystemPromptMessage(
content='You are a helpful AI assistant.',
),
UserPromptMessage(
content="what's the weather today in London?",
)
],
model_parameters={
'temperature': 0.0,
'max_tokens': 100
},
tools=[
PromptMessageTool(
name='get_weather',
description='Determine weather in my location',
parameters={
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state e.g. San Francisco, CA"
},
"unit": {
"type": "string",
"enum": [
"c",
"f"
]
}
},
"required": [
"location"
]
}
),
PromptMessageTool(
name='get_stock_price',
description='Get the current stock price',
parameters={
"type": "object",
"properties": {
"symbol": {
"type": "string",
"description": "The stock symbol"
}
},
"required": [
"symbol"
]
}
)
],
stream=False,
user="abc-123"
)
assert isinstance(result, LLMResult)
assert isinstance(result.message, AssistantPromptMessage)
assert len(result.message.tool_calls) > 0
@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
def test_invoke_stream_chat_model(setup_openai_mock):
model = UpstageLargeLanguageModel()
result = model.invoke(
model='solar-1-mini-chat',
credentials={
'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
},
prompt_messages=[
SystemPromptMessage(
content='You are a helpful AI assistant.',
),
UserPromptMessage(
content='Hello World!'
)
],
model_parameters={
'temperature': 0.0,
'max_tokens': 100
},
stream=True,
user="abc-123"
)
assert isinstance(result, Generator)
for chunk in result:
assert isinstance(chunk, LLMResultChunk)
assert isinstance(chunk.delta, LLMResultChunkDelta)
assert isinstance(chunk.delta.message, AssistantPromptMessage)
assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
if chunk.delta.finish_reason is not None:
assert chunk.delta.usage is not None
assert chunk.delta.usage.completion_tokens > 0
def test_get_num_tokens():
model = UpstageLargeLanguageModel()
num_tokens = model.get_num_tokens(
model='solar-1-mini-chat',
credentials={
'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
},
prompt_messages=[
UserPromptMessage(
content='Hello World!'
)
]
)
assert num_tokens == 13
num_tokens = model.get_num_tokens(
model='solar-1-mini-chat',
credentials={
'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
},
prompt_messages=[
SystemPromptMessage(
content='You are a helpful AI assistant.',
),
UserPromptMessage(
content='Hello World!'
)
],
tools=[
PromptMessageTool(
name='get_weather',
description='Determine weather in my location',
parameters={
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state e.g. San Francisco, CA"
},
"unit": {
"type": "string",
"enum": [
"c",
"f"
]
}
},
"required": [
"location"
]
}
),
]
)
assert num_tokens == 106

View File

@ -0,0 +1,23 @@
import os
import pytest
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.upstage.upstage import UpstageProvider
from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
@pytest.mark.parametrize('setup_openai_mock', [['chat']], indirect=True)
def test_validate_provider_credentials(setup_openai_mock):
provider = UpstageProvider()
with pytest.raises(CredentialsValidateFailedError):
provider.validate_provider_credentials(
credentials={}
)
provider.validate_provider_credentials(
credentials={
'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
}
)

View File

@ -0,0 +1,67 @@
import os
import pytest
from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.upstage.text_embedding.text_embedding import UpstageTextEmbeddingModel
from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
@pytest.mark.parametrize('setup_openai_mock', [['text_embedding']], indirect=True)
def test_validate_credentials(setup_openai_mock):
model = UpstageTextEmbeddingModel()
with pytest.raises(CredentialsValidateFailedError):
model.validate_credentials(
model='solar-embedding-1-large-passage',
credentials={
'upstage_api_key': 'invalid_key'
}
)
model.validate_credentials(
model='solar-embedding-1-large-passage',
credentials={
'upstage_api_key': os.environ.get('UPSTAGE_API_KEY')
}
)
@pytest.mark.parametrize('setup_openai_mock', [['text_embedding']], indirect=True)
def test_invoke_model(setup_openai_mock):
model = UpstageTextEmbeddingModel()
result = model.invoke(
model='solar-embedding-1-large-passage',
credentials={
'upstage_api_key': os.environ.get('UPSTAGE_API_KEY'),
},
texts=[
"hello",
"world",
" ".join(["long_text"] * 100),
" ".join(["another_long_text"] * 100)
],
user="abc-123"
)
assert isinstance(result, TextEmbeddingResult)
assert len(result.embeddings) == 4
assert result.usage.total_tokens == 2
def test_get_num_tokens():
model = UpstageTextEmbeddingModel()
num_tokens = model.get_num_tokens(
model='solar-embedding-1-large-passage',
credentials={
'upstage_api_key': os.environ.get('UPSTAGE_API_KEY'),
},
texts=[
"hello",
"world"
]
)
assert num_tokens == 5

View File

@ -5,4 +5,6 @@ pytest api/tests/integration_tests/model_runtime/anthropic \
api/tests/integration_tests/model_runtime/azure_openai \
api/tests/integration_tests/model_runtime/openai api/tests/integration_tests/model_runtime/chatglm \
api/tests/integration_tests/model_runtime/google api/tests/integration_tests/model_runtime/xinference \
api/tests/integration_tests/model_runtime/huggingface_hub/test_llm.py
api/tests/integration_tests/model_runtime/huggingface_hub/test_llm.py \
api/tests/integration_tests/model_runtime/upstage