feat: add models for gitee.ai (#9490)

2024-11-16 03:32:23 +08:00 · 2024-10-28 16:52:12 +08:00 · 2024-10-28 16:52:12 +08:00 · 0ebd985672
commit 0ebd985672
parent c13dc62065
44 changed files with 1586 additions and 0 deletions
--- a/api/core/model_runtime/model_providers/gitee_ai/_assets/Gitee-AI-Logo-full.svg
+++ b/api/core/model_runtime/model_providers/gitee_ai/_assets/Gitee-AI-Logo-full.svg
--- a/api/core/model_runtime/model_providers/gitee_ai/_assets/Gitee-AI-Logo.svg
+++ b/api/core/model_runtime/model_providers/gitee_ai/_assets/Gitee-AI-Logo.svg
@ -0,0 +1,3 @@
 <svg width="40" height="40" viewBox="0 0 40 40" fill="none" xmlns="http://www.w3.org/2000/svg">
 <path fill-rule="evenodd" clip-rule="evenodd" d="M25.132 24.3947C25.497 25.7527 25.8984 27.1413 26.3334 28.5834C26.7302 29.8992 25.5459 30.4167 25.0752 29.1758C24.571 27.8466 24.0885 26.523 23.6347 25.1729C21.065 26.4654 18.5025 27.5424 15.5961 28.7541C16.7581 33.0256 17.8309 36.5984 19.4952 39.9935C19.4953 39.9936 19.4953 39.9937 19.4954 39.9938C19.6631 39.9979 19.8313 40 20 40C31.0457 40 40 31.0457 40 20C40 16.0335 38.8453 12.3366 36.8537 9.22729C31.6585 9.69534 27.0513 10.4562 22.8185 11.406C22.8882 12.252 22.9677 13.0739 23.0555 13.855C23.3824 16.7604 23.9112 19.5281 24.6137 22.3836C27.0581 21.2848 29.084 20.3225 30.6816 19.522C32.2154 18.7535 33.6943 18.7062 31.2018 20.6594C29.0388 22.1602 27.0644 23.3566 25.132 24.3947ZM36.1559 8.20846C33.0001 3.89184 28.1561 0.887462 22.5955 0.166882C22.4257 2.86234 22.4785 6.26344 22.681 9.50447C26.7473 8.88859 31.1721 8.46032 36.1559 8.20846ZM19.9369 9.73661e-05C19.7594 2.92694 19.8384 6.65663 20.19 9.91293C17.3748 10.4109 14.7225 11.0064 12.1592 11.7038C12.0486 10.4257 11.9927 9.25764 11.9927 8.24178C11.9927 7.5054 11.3957 6.90844 10.6593 6.90844C9.92296 6.90844 9.32601 7.5054 9.32601 8.24178C9.32601 9.47868 9.42873 10.898 9.61402 12.438C8.33567 12.8278 7.07397 13.2443 5.81918 13.688C5.12493 13.9336 4.76118 14.6954 5.0067 15.3896C5.25223 16.0839 6.01406 16.4476 6.7083 16.2021C7.7931 15.8185 8.88482 15.4388 9.98927 15.0659C10.5222 18.3344 11.3344 21.9428 12.2703 25.4156C12.4336 26.0218 12.6062 26.6262 12.7863 27.2263C9.34168 28.4135 5.82612 29.3782 2.61128 29.8879C0.949407 26.9716 0 23.5967 0 20C0 8.97534 8.92023 0.0341108 19.9369 9.73661e-05ZM4.19152 32.2527C7.45069 36.4516 12.3458 39.3173 17.9204 39.8932C16.5916 37.455 14.9338 33.717 13.5405 29.5901C10.4404 30.7762 7.25883 31.6027 4.19152 32.2527ZM22.9735 23.1135C22.1479 20.41 21.4462 17.5441 20.9225 14.277C20.746 13.5841 20.5918 12.8035 20.4593 11.9636C17.6508 12.6606 14.9992 13.4372 12.4356 14.2598C12.8479 17.4766 13.5448 21.1334 14.5118 24.7218C14.662 25.2792 14.8081 25.8248 14.9514 26.3594L14.9516 26.3603L14.9524 26.3634L14.9526 26.3639L14.973 26.4401C16.1833 25.9872 17.3746 25.5123 18.53 25.0259C20.1235 24.3552 21.6051 23.7165 22.9735 23.1135Z" fill="#141519"/>
 </svg>
--- a/api/core/model_runtime/model_providers/gitee_ai/_common.py
+++ b/api/core/model_runtime/model_providers/gitee_ai/_common.py
@ -0,0 +1,47 @@
 from dashscope.common.error import (
    AuthenticationError,
    InvalidParameter,
    RequestFailure,
    ServiceUnavailableError,
    UnsupportedHTTPMethod,
    UnsupportedModel,
 )
 from core.model_runtime.errors.invoke import (
    InvokeAuthorizationError,
    InvokeBadRequestError,
    InvokeConnectionError,
    InvokeError,
    InvokeRateLimitError,
    InvokeServerUnavailableError,
 )
 class _CommonGiteeAI:
    @property
    def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
        """
        Map model invoke error to unified error
        The key is the error type thrown to the caller
        The value is the error type thrown by the model,
        which needs to be converted into a unified error type for the caller.
        :return: Invoke error mapping
        """
        return {
            InvokeConnectionError: [
                RequestFailure,
            ],
            InvokeServerUnavailableError: [
                ServiceUnavailableError,
            ],
            InvokeRateLimitError: [],
            InvokeAuthorizationError: [
                AuthenticationError,
            ],
            InvokeBadRequestError: [
                InvalidParameter,
                UnsupportedModel,
                UnsupportedHTTPMethod,
            ],
        }
--- a/api/core/model_runtime/model_providers/gitee_ai/gitee_ai.py
+++ b/api/core/model_runtime/model_providers/gitee_ai/gitee_ai.py
@ -0,0 +1,25 @@
 import logging
 from core.model_runtime.entities.model_entities import ModelType
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.__base.model_provider import ModelProvider
 logger = logging.getLogger(__name__)
 class GiteeAIProvider(ModelProvider):
    def validate_provider_credentials(self, credentials: dict) -> None:
        """
        Validate provider credentials
        if validate failed, raise exception
        :param credentials: provider credentials, credentials form defined in `provider_credential_schema`.
        """
        try:
            model_instance = self.get_model_instance(ModelType.LLM)
            model_instance.validate_credentials(model="Qwen2-7B-Instruct", credentials=credentials)
        except CredentialsValidateFailedError as ex:
            raise ex
        except Exception as ex:
            logger.exception(f"{self.get_provider_schema().provider} credentials validate failed")
            raise ex
--- a/api/core/model_runtime/model_providers/gitee_ai/gitee_ai.yaml
+++ b/api/core/model_runtime/model_providers/gitee_ai/gitee_ai.yaml
@ -0,0 +1,35 @@
 provider: gitee_ai
 label:
  en_US: Gitee AI
  zh_Hans: Gitee AI
 description:
  en_US: 快速体验大模型，领先探索 AI 开源世界
  zh_Hans: 快速体验大模型，领先探索 AI 开源世界
 icon_small:
  en_US: Gitee-AI-Logo.svg
 icon_large:
  en_US: Gitee-AI-Logo-full.svg
 help:
  title:
    en_US: Get your token from Gitee AI
    zh_Hans: 从 Gitee AI 获取 token
  url:
    en_US: https://ai.gitee.com/dashboard/settings/tokens
 supported_model_types:
  - llm
  - text-embedding
  - rerank
  - speech2text
  - tts
 configurate_methods:
  - predefined-model
 provider_credential_schema:
  credential_form_schemas:
    - variable: api_key
      label:
        en_US: API Key
      type: secret-input
      required: true
      placeholder:
        zh_Hans: 在此输入您的 API Key
        en_US: Enter your API Key
--- a/api/core/model_runtime/model_providers/gitee_ai/llm/Qwen2-72B-Instruct.yaml
+++ b/api/core/model_runtime/model_providers/gitee_ai/llm/Qwen2-72B-Instruct.yaml
@ -0,0 +1,105 @@
 model: Qwen2-72B-Instruct
 label:
  zh_Hans: Qwen2-72B-Instruct
  en_US: Qwen2-72B-Instruct
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: chat
  context_size: 6400
 parameter_rules:
  - name: stream
    use_template: boolean
    label:
      en_US: "Stream"
      zh_Hans: "流式"
    type: boolean
    default: true
    required: true
    help:
      en_US: "Whether to return the results in batches through streaming. If set to true, the generated text will be pushed to the user in real time during the generation process."
      zh_Hans: "是否通过流式分批返回结果。如果设置为 true，生成过程中实时地向用户推送每一部分生成的文本。"
  - name: max_tokens
    use_template: max_tokens
    label:
      en_US: "Max Tokens"
      zh_Hans: "最大Token数"
    type: int
    default: 512
    min: 1
    required: true
    help:
      en_US: "The maximum number of tokens that can be generated by the model varies depending on the model."
      zh_Hans: "模型可生成的最大 token 个数，不同模型上限不同。"
  - name: temperature
    use_template: temperature
    label:
      en_US: "Temperature"
      zh_Hans: "采样温度"
    type: float
    default: 0.7
    min: 0.0
    max: 1.0
    precision: 1
    required: true
    help:
      en_US: "The randomness of the sampling temperature control output. The temperature value is within the range of [0.0, 1.0]. The higher the value, the more random and creative the output; the lower the value, the more stable it is. It is recommended to adjust either top_p or temperature parameters according to your needs to avoid adjusting both at the same time."
      zh_Hans: "采样温度控制输出的随机性。温度值在 [0.0, 1.0] 范围内，值越高，输出越随机和创造性；值越低，输出越稳定。建议根据需求调整 top_p 或 temperature 参数，避免同时调整两者。"
  - name: top_p
    use_template: top_p
    label:
      en_US: "Top P"
      zh_Hans: "Top P"
    type: float
    default: 0.7
    min: 0.0
    max: 1.0
    precision: 1
    required: true
    help:
      en_US: "The value range of the sampling method is [0.0, 1.0]. The top_p value determines that the model selects tokens from the top p% of candidate words with the highest probability; when top_p is 0, this parameter is invalid. It is recommended to adjust either top_p or temperature parameters according to your needs to avoid adjusting both at the same time."
      zh_Hans: "采样方法的取值范围为 [0.0,1.0]。top_p 值确定模型从概率最高的前p%的候选词中选取 tokens；当 top_p 为 0 时，此参数无效。建议根据需求调整 top_p 或 temperature 参数，避免同时调整两者。"
  - name: top_k
    use_template: top_k
    label:
      en_US: "Top K"
      zh_Hans: "Top K"
    type: int
    default: 50
    min: 0
    max: 100
    required: true
    help:
      en_US: "The value range is [0,100], which limits the model to only select from the top k words with the highest probability when choosing the next word at each step. The larger the value, the more diverse text generation will be."
      zh_Hans: "取值范围为 [0,100]，限制模型在每一步选择下一个词时，只从概率最高的前 k 个词中选取。数值越大，文本生成越多样。"
  - name: frequency_penalty
    use_template: frequency_penalty
    label:
      en_US: "Frequency Penalty"
      zh_Hans: "频率惩罚"
    type: float
    default: 0
    min: -1.0
    max: 1.0
    precision: 1
    required: false
    help:
      en_US: "Used to adjust the frequency of repeated content in automatically generated text. Positive numbers reduce repetition, while negative numbers increase repetition. After setting this parameter, if a word has already appeared in the text, the model will decrease the probability of choosing that word for subsequent generation."
      zh_Hans: "用于调整自动生成文本中重复内容的频率。正数减少重复，负数增加重复。设置此参数后，如果一个词在文本中已经出现过，模型在后续生成中选择该词的概率会降低。"
  - name: user
    use_template: text
    label:
      en_US: "User"
      zh_Hans: "用户"
    type: string
    required: false
    help:
      en_US: "Used to track and differentiate conversation requests from different users."
      zh_Hans: "用于追踪和区分不同用户的对话请求。"
--- a/api/core/model_runtime/model_providers/gitee_ai/llm/Qwen2-7B-Instruct.yaml
+++ b/api/core/model_runtime/model_providers/gitee_ai/llm/Qwen2-7B-Instruct.yaml
@ -0,0 +1,105 @@
 model: Qwen2-7B-Instruct
 label:
  zh_Hans: Qwen2-7B-Instruct
  en_US: Qwen2-7B-Instruct
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: chat
  context_size: 32768
 parameter_rules:
  - name: stream
    use_template: boolean
    label:
      en_US: "Stream"
      zh_Hans: "流式"
    type: boolean
    default: true
    required: true
    help:
      en_US: "Whether to return the results in batches through streaming. If set to true, the generated text will be pushed to the user in real time during the generation process."
      zh_Hans: "是否通过流式分批返回结果。如果设置为 true，生成过程中实时地向用户推送每一部分生成的文本。"
  - name: max_tokens
    use_template: max_tokens
    label:
      en_US: "Max Tokens"
      zh_Hans: "最大Token数"
    type: int
    default: 512
    min: 1
    required: true
    help:
      en_US: "The maximum number of tokens that can be generated by the model varies depending on the model."
      zh_Hans: "模型可生成的最大 token 个数，不同模型上限不同。"
  - name: temperature
    use_template: temperature
    label:
      en_US: "Temperature"
      zh_Hans: "采样温度"
    type: float
    default: 0.7
    min: 0.0
    max: 1.0
    precision: 1
    required: true
    help:
      en_US: "The randomness of the sampling temperature control output. The temperature value is within the range of [0.0, 1.0]. The higher the value, the more random and creative the output; the lower the value, the more stable it is. It is recommended to adjust either top_p or temperature parameters according to your needs to avoid adjusting both at the same time."
      zh_Hans: "采样温度控制输出的随机性。温度值在 [0.0, 1.0] 范围内，值越高，输出越随机和创造性；值越低，输出越稳定。建议根据需求调整 top_p 或 temperature 参数，避免同时调整两者。"
  - name: top_p
    use_template: top_p
    label:
      en_US: "Top P"
      zh_Hans: "Top P"
    type: float
    default: 0.7
    min: 0.0
    max: 1.0
    precision: 1
    required: true
    help:
      en_US: "The value range of the sampling method is [0.0, 1.0]. The top_p value determines that the model selects tokens from the top p% of candidate words with the highest probability; when top_p is 0, this parameter is invalid. It is recommended to adjust either top_p or temperature parameters according to your needs to avoid adjusting both at the same time."
      zh_Hans: "采样方法的取值范围为 [0.0,1.0]。top_p 值确定模型从概率最高的前p%的候选词中选取 tokens；当 top_p 为 0 时，此参数无效。建议根据需求调整 top_p 或 temperature 参数，避免同时调整两者。"
  - name: top_k
    use_template: top_k
    label:
      en_US: "Top K"
      zh_Hans: "Top K"
    type: int
    default: 50
    min: 0
    max: 100
    required: true
    help:
      en_US: "The value range is [0,100], which limits the model to only select from the top k words with the highest probability when choosing the next word at each step. The larger the value, the more diverse text generation will be."
      zh_Hans: "取值范围为 [0,100]，限制模型在每一步选择下一个词时，只从概率最高的前 k 个词中选取。数值越大，文本生成越多样。"
  - name: frequency_penalty
    use_template: frequency_penalty
    label:
      en_US: "Frequency Penalty"
      zh_Hans: "频率惩罚"
    type: float
    default: 0
    min: -1.0
    max: 1.0
    precision: 1
    required: false
    help:
      en_US: "Used to adjust the frequency of repeated content in automatically generated text. Positive numbers reduce repetition, while negative numbers increase repetition. After setting this parameter, if a word has already appeared in the text, the model will decrease the probability of choosing that word for subsequent generation."
      zh_Hans: "用于调整自动生成文本中重复内容的频率。正数减少重复，负数增加重复。设置此参数后，如果一个词在文本中已经出现过，模型在后续生成中选择该词的概率会降低。"
  - name: user
    use_template: text
    label:
      en_US: "User"
      zh_Hans: "用户"
    type: string
    required: false
    help:
      en_US: "Used to track and differentiate conversation requests from different users."
      zh_Hans: "用于追踪和区分不同用户的对话请求。"
--- a/api/core/model_runtime/model_providers/gitee_ai/llm/Yi-1.5-34B-Chat.yaml
+++ b/api/core/model_runtime/model_providers/gitee_ai/llm/Yi-1.5-34B-Chat.yaml
@ -0,0 +1,105 @@
 model: Yi-1.5-34B-Chat
 label:
  zh_Hans: Yi-1.5-34B-Chat
  en_US: Yi-1.5-34B-Chat
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: chat
  context_size: 4096
 parameter_rules:
  - name: stream
    use_template: boolean
    label:
      en_US: "Stream"
      zh_Hans: "流式"
    type: boolean
    default: true
    required: true
    help:
      en_US: "Whether to return the results in batches through streaming. If set to true, the generated text will be pushed to the user in real time during the generation process."
      zh_Hans: "是否通过流式分批返回结果。如果设置为 true，生成过程中实时地向用户推送每一部分生成的文本。"
  - name: max_tokens
    use_template: max_tokens
    label:
      en_US: "Max Tokens"
      zh_Hans: "最大Token数"
    type: int
    default: 512
    min: 1
    required: true
    help:
      en_US: "The maximum number of tokens that can be generated by the model varies depending on the model."
      zh_Hans: "模型可生成的最大 token 个数，不同模型上限不同。"
  - name: temperature
    use_template: temperature
    label:
      en_US: "Temperature"
      zh_Hans: "采样温度"
    type: float
    default: 0.7
    min: 0.0
    max: 1.0
    precision: 1
    required: true
    help:
      en_US: "The randomness of the sampling temperature control output. The temperature value is within the range of [0.0, 1.0]. The higher the value, the more random and creative the output; the lower the value, the more stable it is. It is recommended to adjust either top_p or temperature parameters according to your needs to avoid adjusting both at the same time."
      zh_Hans: "采样温度控制输出的随机性。温度值在 [0.0, 1.0] 范围内，值越高，输出越随机和创造性；值越低，输出越稳定。建议根据需求调整 top_p 或 temperature 参数，避免同时调整两者。"
  - name: top_p
    use_template: top_p
    label:
      en_US: "Top P"
      zh_Hans: "Top P"
    type: float
    default: 0.7
    min: 0.0
    max: 1.0
    precision: 1
    required: true
    help:
      en_US: "The value range of the sampling method is [0.0, 1.0]. The top_p value determines that the model selects tokens from the top p% of candidate words with the highest probability; when top_p is 0, this parameter is invalid. It is recommended to adjust either top_p or temperature parameters according to your needs to avoid adjusting both at the same time."
      zh_Hans: "采样方法的取值范围为 [0.0,1.0]。top_p 值确定模型从概率最高的前p%的候选词中选取 tokens；当 top_p 为 0 时，此参数无效。建议根据需求调整 top_p 或 temperature 参数，避免同时调整两者。"
  - name: top_k
    use_template: top_k
    label:
      en_US: "Top K"
      zh_Hans: "Top K"
    type: int
    default: 50
    min: 0
    max: 100
    required: true
    help:
      en_US: "The value range is [0,100], which limits the model to only select from the top k words with the highest probability when choosing the next word at each step. The larger the value, the more diverse text generation will be."
      zh_Hans: "取值范围为 [0,100]，限制模型在每一步选择下一个词时，只从概率最高的前 k 个词中选取。数值越大，文本生成越多样。"
  - name: frequency_penalty
    use_template: frequency_penalty
    label:
      en_US: "Frequency Penalty"
      zh_Hans: "频率惩罚"
    type: float
    default: 0
    min: -1.0
    max: 1.0
    precision: 1
    required: false
    help:
      en_US: "Used to adjust the frequency of repeated content in automatically generated text. Positive numbers reduce repetition, while negative numbers increase repetition. After setting this parameter, if a word has already appeared in the text, the model will decrease the probability of choosing that word for subsequent generation."
      zh_Hans: "用于调整自动生成文本中重复内容的频率。正数减少重复，负数增加重复。设置此参数后，如果一个词在文本中已经出现过，模型在后续生成中选择该词的概率会降低。"
  - name: user
    use_template: text
    label:
      en_US: "User"
      zh_Hans: "用户"
    type: string
    required: false
    help:
      en_US: "Used to track and differentiate conversation requests from different users."
      zh_Hans: "用于追踪和区分不同用户的对话请求。"
--- a/api/core/model_runtime/model_providers/gitee_ai/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/gitee_ai/llm/_position.yaml
@ -0,0 +1,7 @@
 - Qwen2-7B-Instruct
 - Qwen2-72B-Instruct
 - Yi-1.5-34B-Chat
 - glm-4-9b-chat
 - deepseek-coder-33B-instruct-chat
 - deepseek-coder-33B-instruct-completions
 - codegeex4-all-9b
--- a/api/core/model_runtime/model_providers/gitee_ai/llm/codegeex4-all-9b.yaml
+++ b/api/core/model_runtime/model_providers/gitee_ai/llm/codegeex4-all-9b.yaml
@ -0,0 +1,105 @@
 model: codegeex4-all-9b
 label:
  zh_Hans: codegeex4-all-9b
  en_US: codegeex4-all-9b
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: chat
  context_size: 40960
 parameter_rules:
  - name: stream
    use_template: boolean
    label:
      en_US: "Stream"
      zh_Hans: "流式"
    type: boolean
    default: true
    required: true
    help:
      en_US: "Whether to return the results in batches through streaming. If set to true, the generated text will be pushed to the user in real time during the generation process."
      zh_Hans: "是否通过流式分批返回结果。如果设置为 true，生成过程中实时地向用户推送每一部分生成的文本。"
  - name: max_tokens
    use_template: max_tokens
    label:
      en_US: "Max Tokens"
      zh_Hans: "最大Token数"
    type: int
    default: 512
    min: 1
    required: true
    help:
      en_US: "The maximum number of tokens that can be generated by the model varies depending on the model."
      zh_Hans: "模型可生成的最大 token 个数，不同模型上限不同。"
  - name: temperature
    use_template: temperature
    label:
      en_US: "Temperature"
      zh_Hans: "采样温度"
    type: float
    default: 0.7
    min: 0.0
    max: 1.0
    precision: 1
    required: true
    help:
      en_US: "The randomness of the sampling temperature control output. The temperature value is within the range of [0.0, 1.0]. The higher the value, the more random and creative the output; the lower the value, the more stable it is. It is recommended to adjust either top_p or temperature parameters according to your needs to avoid adjusting both at the same time."
      zh_Hans: "采样温度控制输出的随机性。温度值在 [0.0, 1.0] 范围内，值越高，输出越随机和创造性；值越低，输出越稳定。建议根据需求调整 top_p 或 temperature 参数，避免同时调整两者。"
  - name: top_p
    use_template: top_p
    label:
      en_US: "Top P"
      zh_Hans: "Top P"
    type: float
    default: 0.7
    min: 0.0
    max: 1.0
    precision: 1
    required: true
    help:
      en_US: "The value range of the sampling method is [0.0, 1.0]. The top_p value determines that the model selects tokens from the top p% of candidate words with the highest probability; when top_p is 0, this parameter is invalid. It is recommended to adjust either top_p or temperature parameters according to your needs to avoid adjusting both at the same time."
      zh_Hans: "采样方法的取值范围为 [0.0,1.0]。top_p 值确定模型从概率最高的前p%的候选词中选取 tokens；当 top_p 为 0 时，此参数无效。建议根据需求调整 top_p 或 temperature 参数，避免同时调整两者。"
  - name: top_k
    use_template: top_k
    label:
      en_US: "Top K"
      zh_Hans: "Top K"
    type: int
    default: 50
    min: 0
    max: 100
    required: true
    help:
      en_US: "The value range is [0,100], which limits the model to only select from the top k words with the highest probability when choosing the next word at each step. The larger the value, the more diverse text generation will be."
      zh_Hans: "取值范围为 [0,100]，限制模型在每一步选择下一个词时，只从概率最高的前 k 个词中选取。数值越大，文本生成越多样。"
  - name: frequency_penalty
    use_template: frequency_penalty
    label:
      en_US: "Frequency Penalty"
      zh_Hans: "频率惩罚"
    type: float
    default: 0
    min: -1.0
    max: 1.0
    precision: 1
    required: false
    help:
      en_US: "Used to adjust the frequency of repeated content in automatically generated text. Positive numbers reduce repetition, while negative numbers increase repetition. After setting this parameter, if a word has already appeared in the text, the model will decrease the probability of choosing that word for subsequent generation."
      zh_Hans: "用于调整自动生成文本中重复内容的频率。正数减少重复，负数增加重复。设置此参数后，如果一个词在文本中已经出现过，模型在后续生成中选择该词的概率会降低。"
  - name: user
    use_template: text
    label:
      en_US: "User"
      zh_Hans: "用户"
    type: string
    required: false
    help:
      en_US: "Used to track and differentiate conversation requests from different users."
      zh_Hans: "用于追踪和区分不同用户的对话请求。"
--- a/api/core/model_runtime/model_providers/gitee_ai/llm/deepseek-coder-33B-instruct-chat.yaml
+++ b/api/core/model_runtime/model_providers/gitee_ai/llm/deepseek-coder-33B-instruct-chat.yaml
@ -0,0 +1,105 @@
 model: deepseek-coder-33B-instruct-chat
 label:
  zh_Hans: deepseek-coder-33B-instruct-chat
  en_US: deepseek-coder-33B-instruct-chat
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: chat
  context_size: 9000
 parameter_rules:
  - name: stream
    use_template: boolean
    label:
      en_US: "Stream"
      zh_Hans: "流式"
    type: boolean
    default: true
    required: true
    help:
      en_US: "Whether to return the results in batches through streaming. If set to true, the generated text will be pushed to the user in real time during the generation process."
      zh_Hans: "是否通过流式分批返回结果。如果设置为 true，生成过程中实时地向用户推送每一部分生成的文本。"
  - name: max_tokens
    use_template: max_tokens
    label:
      en_US: "Max Tokens"
      zh_Hans: "最大Token数"
    type: int
    default: 512
    min: 1
    required: true
    help:
      en_US: "The maximum number of tokens that can be generated by the model varies depending on the model."
      zh_Hans: "模型可生成的最大 token 个数，不同模型上限不同。"
  - name: temperature
    use_template: temperature
    label:
      en_US: "Temperature"
      zh_Hans: "采样温度"
    type: float
    default: 0.7
    min: 0.0
    max: 1.0
    precision: 1
    required: true
    help:
      en_US: "The randomness of the sampling temperature control output. The temperature value is within the range of [0.0, 1.0]. The higher the value, the more random and creative the output; the lower the value, the more stable it is. It is recommended to adjust either top_p or temperature parameters according to your needs to avoid adjusting both at the same time."
      zh_Hans: "采样温度控制输出的随机性。温度值在 [0.0, 1.0] 范围内，值越高，输出越随机和创造性；值越低，输出越稳定。建议根据需求调整 top_p 或 temperature 参数，避免同时调整两者。"
  - name: top_p
    use_template: top_p
    label:
      en_US: "Top P"
      zh_Hans: "Top P"
    type: float
    default: 0.7
    min: 0.0
    max: 1.0
    precision: 1
    required: true
    help:
      en_US: "The value range of the sampling method is [0.0, 1.0]. The top_p value determines that the model selects tokens from the top p% of candidate words with the highest probability; when top_p is 0, this parameter is invalid. It is recommended to adjust either top_p or temperature parameters according to your needs to avoid adjusting both at the same time."
      zh_Hans: "采样方法的取值范围为 [0.0,1.0]。top_p 值确定模型从概率最高的前p%的候选词中选取 tokens；当 top_p 为 0 时，此参数无效。建议根据需求调整 top_p 或 temperature 参数，避免同时调整两者。"
  - name: top_k
    use_template: top_k
    label:
      en_US: "Top K"
      zh_Hans: "Top K"
    type: int
    default: 50
    min: 0
    max: 100
    required: true
    help:
      en_US: "The value range is [0,100], which limits the model to only select from the top k words with the highest probability when choosing the next word at each step. The larger the value, the more diverse text generation will be."
      zh_Hans: "取值范围为 [0,100]，限制模型在每一步选择下一个词时，只从概率最高的前 k 个词中选取。数值越大，文本生成越多样。"
  - name: frequency_penalty
    use_template: frequency_penalty
    label:
      en_US: "Frequency Penalty"
      zh_Hans: "频率惩罚"
    type: float
    default: 0
    min: -1.0
    max: 1.0
    precision: 1
    required: false
    help:
      en_US: "Used to adjust the frequency of repeated content in automatically generated text. Positive numbers reduce repetition, while negative numbers increase repetition. After setting this parameter, if a word has already appeared in the text, the model will decrease the probability of choosing that word for subsequent generation."
      zh_Hans: "用于调整自动生成文本中重复内容的频率。正数减少重复，负数增加重复。设置此参数后，如果一个词在文本中已经出现过，模型在后续生成中选择该词的概率会降低。"
  - name: user
    use_template: text
    label:
      en_US: "User"
      zh_Hans: "用户"
    type: string
    required: false
    help:
      en_US: "Used to track and differentiate conversation requests from different users."
      zh_Hans: "用于追踪和区分不同用户的对话请求。"
--- a/api/core/model_runtime/model_providers/gitee_ai/llm/deepseek-coder-33B-instruct-completions.yaml
+++ b/api/core/model_runtime/model_providers/gitee_ai/llm/deepseek-coder-33B-instruct-completions.yaml
@ -0,0 +1,91 @@
 model: deepseek-coder-33B-instruct-completions
 label:
  zh_Hans: deepseek-coder-33B-instruct-completions
  en_US: deepseek-coder-33B-instruct-completions
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: completion
  context_size: 9000
 parameter_rules:
  - name: stream
    use_template: boolean
    label:
      en_US: "Stream"
      zh_Hans: "流式"
    type: boolean
    default: true
    required: true
    help:
      en_US: "Whether to return the results in batches through streaming. If set to true, the generated text will be pushed to the user in real time during the generation process."
      zh_Hans: "是否通过流式分批返回结果。如果设置为 true，生成过程中实时地向用户推送每一部分生成的文本。"
  - name: max_tokens
    use_template: max_tokens
    label:
      en_US: "Max Tokens"
      zh_Hans: "最大Token数"
    type: int
    default: 512
    min: 1
    required: true
    help:
      en_US: "The maximum number of tokens that can be generated by the model varies depending on the model."
      zh_Hans: "模型可生成的最大 token 个数，不同模型上限不同。"
  - name: temperature
    use_template: temperature
    label:
      en_US: "Temperature"
      zh_Hans: "采样温度"
    type: float
    default: 0.7
    min: 0.0
    max: 1.0
    precision: 1
    required: true
    help:
      en_US: "The randomness of the sampling temperature control output. The temperature value is within the range of [0.0, 1.0]. The higher the value, the more random and creative the output; the lower the value, the more stable it is. It is recommended to adjust either top_p or temperature parameters according to your needs to avoid adjusting both at the same time."
      zh_Hans: "采样温度控制输出的随机性。温度值在 [0.0, 1.0] 范围内，值越高，输出越随机和创造性；值越低，输出越稳定。建议根据需求调整 top_p 或 temperature 参数，避免同时调整两者。"
  - name: top_p
    use_template: top_p
    label:
      en_US: "Top P"
      zh_Hans: "Top P"
    type: float
    default: 0.7
    min: 0.0
    max: 1.0
    precision: 1
    required: true
    help:
      en_US: "The value range of the sampling method is [0.0, 1.0]. The top_p value determines that the model selects tokens from the top p% of candidate words with the highest probability; when top_p is 0, this parameter is invalid. It is recommended to adjust either top_p or temperature parameters according to your needs to avoid adjusting both at the same time."
      zh_Hans: "采样方法的取值范围为 [0.0,1.0]。top_p 值确定模型从概率最高的前p%的候选词中选取 tokens；当 top_p 为 0 时，此参数无效。建议根据需求调整 top_p 或 temperature 参数，避免同时调整两者。"
  - name: frequency_penalty
    use_template: frequency_penalty
    label:
      en_US: "Frequency Penalty"
      zh_Hans: "频率惩罚"
    type: float
    default: 0
    min: -1.0
    max: 1.0
    precision: 1
    required: false
    help:
      en_US: "Used to adjust the frequency of repeated content in automatically generated text. Positive numbers reduce repetition, while negative numbers increase repetition. After setting this parameter, if a word has already appeared in the text, the model will decrease the probability of choosing that word for subsequent generation."
      zh_Hans: "用于调整自动生成文本中重复内容的频率。正数减少重复，负数增加重复。设置此参数后，如果一个词在文本中已经出现过，模型在后续生成中选择该词的概率会降低。"
  - name: user
    use_template: text
    label:
      en_US: "User"
      zh_Hans: "用户"
    type: string
    required: false
    help:
      en_US: "Used to track and differentiate conversation requests from different users."
      zh_Hans: "用于追踪和区分不同用户的对话请求。"
--- a/api/core/model_runtime/model_providers/gitee_ai/llm/glm-4-9b-chat.yaml
+++ b/api/core/model_runtime/model_providers/gitee_ai/llm/glm-4-9b-chat.yaml
@ -0,0 +1,105 @@
 model: glm-4-9b-chat
 label:
  zh_Hans: glm-4-9b-chat
  en_US: glm-4-9b-chat
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: chat
  context_size: 32768
 parameter_rules:
  - name: stream
    use_template: boolean
    label:
      en_US: "Stream"
      zh_Hans: "流式"
    type: boolean
    default: true
    required: true
    help:
      en_US: "Whether to return the results in batches through streaming. If set to true, the generated text will be pushed to the user in real time during the generation process."
      zh_Hans: "是否通过流式分批返回结果。如果设置为 true，生成过程中实时地向用户推送每一部分生成的文本。"
  - name: max_tokens
    use_template: max_tokens
    label:
      en_US: "Max Tokens"
      zh_Hans: "最大Token数"
    type: int
    default: 512
    min: 1
    required: true
    help:
      en_US: "The maximum number of tokens that can be generated by the model varies depending on the model."
      zh_Hans: "模型可生成的最大 token 个数，不同模型上限不同。"
  - name: temperature
    use_template: temperature
    label:
      en_US: "Temperature"
      zh_Hans: "采样温度"
    type: float
    default: 0.7
    min: 0.0
    max: 1.0
    precision: 1
    required: true
    help:
      en_US: "The randomness of the sampling temperature control output. The temperature value is within the range of [0.0, 1.0]. The higher the value, the more random and creative the output; the lower the value, the more stable it is. It is recommended to adjust either top_p or temperature parameters according to your needs to avoid adjusting both at the same time."
      zh_Hans: "采样温度控制输出的随机性。温度值在 [0.0, 1.0] 范围内，值越高，输出越随机和创造性；值越低，输出越稳定。建议根据需求调整 top_p 或 temperature 参数，避免同时调整两者。"
  - name: top_p
    use_template: top_p
    label:
      en_US: "Top P"
      zh_Hans: "Top P"
    type: float
    default: 0.7
    min: 0.0
    max: 1.0
    precision: 1
    required: true
    help:
      en_US: "The value range of the sampling method is [0.0, 1.0]. The top_p value determines that the model selects tokens from the top p% of candidate words with the highest probability; when top_p is 0, this parameter is invalid. It is recommended to adjust either top_p or temperature parameters according to your needs to avoid adjusting both at the same time."
      zh_Hans: "采样方法的取值范围为 [0.0,1.0]。top_p 值确定模型从概率最高的前p%的候选词中选取 tokens；当 top_p 为 0 时，此参数无效。建议根据需求调整 top_p 或 temperature 参数，避免同时调整两者。"
  - name: top_k
    use_template: top_k
    label:
      en_US: "Top K"
      zh_Hans: "Top K"
    type: int
    default: 50
    min: 0
    max: 100
    required: true
    help:
      en_US: "The value range is [0,100], which limits the model to only select from the top k words with the highest probability when choosing the next word at each step. The larger the value, the more diverse text generation will be."
      zh_Hans: "取值范围为 [0,100]，限制模型在每一步选择下一个词时，只从概率最高的前 k 个词中选取。数值越大，文本生成越多样。"
  - name: frequency_penalty
    use_template: frequency_penalty
    label:
      en_US: "Frequency Penalty"
      zh_Hans: "频率惩罚"
    type: float
    default: 0
    min: -1.0
    max: 1.0
    precision: 1
    required: false
    help:
      en_US: "Used to adjust the frequency of repeated content in automatically generated text. Positive numbers reduce repetition, while negative numbers increase repetition. After setting this parameter, if a word has already appeared in the text, the model will decrease the probability of choosing that word for subsequent generation."
      zh_Hans: "用于调整自动生成文本中重复内容的频率。正数减少重复，负数增加重复。设置此参数后，如果一个词在文本中已经出现过，模型在后续生成中选择该词的概率会降低。"
  - name: user
    use_template: text
    label:
      en_US: "User"
      zh_Hans: "用户"
    type: string
    required: false
    help:
      en_US: "Used to track and differentiate conversation requests from different users."
      zh_Hans: "用于追踪和区分不同用户的对话请求。"
--- a/api/core/model_runtime/model_providers/gitee_ai/llm/llm.py
+++ b/api/core/model_runtime/model_providers/gitee_ai/llm/llm.py
@ -0,0 +1,47 @@
 from collections.abc import Generator
 from typing import Optional, Union
 from core.model_runtime.entities.llm_entities import LLMMode, LLMResult
 from core.model_runtime.entities.message_entities import (
    PromptMessage,
    PromptMessageTool,
 )
 from core.model_runtime.model_providers.openai_api_compatible.llm.llm import OAIAPICompatLargeLanguageModel
 class GiteeAILargeLanguageModel(OAIAPICompatLargeLanguageModel):
    MODEL_TO_IDENTITY: dict[str, str] = {
        "Yi-1.5-34B-Chat": "Yi-34B-Chat",
        "deepseek-coder-33B-instruct-completions": "deepseek-coder-33B-instruct",
        "deepseek-coder-33B-instruct-chat": "deepseek-coder-33B-instruct",
    }
    def _invoke(
        self,
        model: str,
        credentials: dict,
        prompt_messages: list[PromptMessage],
        model_parameters: dict,
        tools: Optional[list[PromptMessageTool]] = None,
        stop: Optional[list[str]] = None,
        stream: bool = True,
        user: Optional[str] = None,
    ) -> Union[LLMResult, Generator]:
        self._add_custom_parameters(credentials, model, model_parameters)
        return super()._invoke(model, credentials, prompt_messages, model_parameters, tools, stop, stream)
    def validate_credentials(self, model: str, credentials: dict) -> None:
        self._add_custom_parameters(credentials, model, None)
        super().validate_credentials(model, credentials)
    @staticmethod
    def _add_custom_parameters(credentials: dict, model: str, model_parameters: dict) -> None:
        if model is None:
            model = "bge-large-zh-v1.5"
        model_identity = GiteeAILargeLanguageModel.MODEL_TO_IDENTITY.get(model, model)
        credentials["endpoint_url"] = f"https://ai.gitee.com/api/serverless/{model_identity}/"
        if model.endswith("completions"):
            credentials["mode"] = LLMMode.COMPLETION.value
        else:
            credentials["mode"] = LLMMode.CHAT.value
--- a/api/core/model_runtime/model_providers/gitee_ai/rerank/init.py
+++ b/api/core/model_runtime/model_providers/gitee_ai/rerank/init.py
--- a/api/core/model_runtime/model_providers/gitee_ai/rerank/_position.yaml
+++ b/api/core/model_runtime/model_providers/gitee_ai/rerank/_position.yaml
@ -0,0 +1 @@
 - bge-reranker-v2-m3
--- a/api/core/model_runtime/model_providers/gitee_ai/rerank/bge-reranker-v2-m3.yaml
+++ b/api/core/model_runtime/model_providers/gitee_ai/rerank/bge-reranker-v2-m3.yaml
@ -0,0 +1,4 @@
 model: bge-reranker-v2-m3
 model_type: rerank
 model_properties:
  context_size: 1024
--- a/api/core/model_runtime/model_providers/gitee_ai/rerank/rerank.py
+++ b/api/core/model_runtime/model_providers/gitee_ai/rerank/rerank.py
@ -0,0 +1,128 @@
 from typing import Optional
 import httpx
 from core.model_runtime.entities.common_entities import I18nObject
 from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType
 from core.model_runtime.entities.rerank_entities import RerankDocument, RerankResult
 from core.model_runtime.errors.invoke import (
    InvokeAuthorizationError,
    InvokeBadRequestError,
    InvokeConnectionError,
    InvokeError,
    InvokeRateLimitError,
    InvokeServerUnavailableError,
 )
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.__base.rerank_model import RerankModel
 class GiteeAIRerankModel(RerankModel):
    """
    Model class for rerank model.
    """
    def _invoke(
        self,
        model: str,
        credentials: dict,
        query: str,
        docs: list[str],
        score_threshold: Optional[float] = None,
        top_n: Optional[int] = None,
        user: Optional[str] = None,
    ) -> RerankResult:
        """
        Invoke rerank model
        :param model: model name
        :param credentials: model credentials
        :param query: search query
        :param docs: docs for reranking
        :param score_threshold: score threshold
        :param top_n: top n documents to return
        :param user: unique user id
        :return: rerank result
        """
        if len(docs) == 0:
            return RerankResult(model=model, docs=[])
        base_url = credentials.get("base_url", "https://ai.gitee.com/api/serverless")
        base_url = base_url.removesuffix("/")
        try:
            body = {"model": model, "query": query, "documents": docs}
            if top_n is not None:
                body["top_n"] = top_n
            response = httpx.post(
                f"{base_url}/{model}/rerank",
                json=body,
                headers={"Authorization": f"Bearer {credentials.get('api_key')}"},
            )
            response.raise_for_status()
            results = response.json()
            rerank_documents = []
            for result in results["results"]:
                rerank_document = RerankDocument(
                    index=result["index"],
                    text=result["document"]["text"],
                    score=result["relevance_score"],
                )
                if score_threshold is None or result["relevance_score"] >= score_threshold:
                    rerank_documents.append(rerank_document)
            return RerankResult(model=model, docs=rerank_documents)
        except httpx.HTTPStatusError as e:
            raise InvokeServerUnavailableError(str(e))
    def validate_credentials(self, model: str, credentials: dict) -> None:
        """
        Validate model credentials
        :param model: model name
        :param credentials: model credentials
        :return:
        """
        try:
            self._invoke(
                model=model,
                credentials=credentials,
                query="What is the capital of the United States?",
                docs=[
                    "Carson City is the capital city of the American state of Nevada. At the 2010 United States "
                    "Census, Carson City had a population of 55,274.",
                    "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that "
                    "are a political division controlled by the United States. Its capital is Saipan.",
                ],
                score_threshold=0.01,
            )
        except Exception as ex:
            raise CredentialsValidateFailedError(str(ex))
    @property
    def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
        """
        Map model invoke error to unified error
        """
        return {
            InvokeConnectionError: [httpx.ConnectError],
            InvokeServerUnavailableError: [httpx.RemoteProtocolError],
            InvokeRateLimitError: [],
            InvokeAuthorizationError: [httpx.HTTPStatusError],
            InvokeBadRequestError: [httpx.RequestError],
        }
    def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
        """
        generate custom model entities from credentials
        """
        entity = AIModelEntity(
            model=model,
            label=I18nObject(en_US=model),
            model_type=ModelType.RERANK,
            fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
            model_properties={ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size"))},
        )
        return entity
--- a/api/core/model_runtime/model_providers/gitee_ai/speech2text/init.py
+++ b/api/core/model_runtime/model_providers/gitee_ai/speech2text/init.py
--- a/api/core/model_runtime/model_providers/gitee_ai/speech2text/_position.yaml
+++ b/api/core/model_runtime/model_providers/gitee_ai/speech2text/_position.yaml
@ -0,0 +1,2 @@
 - whisper-base
 - whisper-large
--- a/api/core/model_runtime/model_providers/gitee_ai/speech2text/speech2text.py
+++ b/api/core/model_runtime/model_providers/gitee_ai/speech2text/speech2text.py
@ -0,0 +1,53 @@
 import os
 from typing import IO, Optional
 import requests
 from core.model_runtime.errors.invoke import InvokeBadRequestError
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.__base.speech2text_model import Speech2TextModel
 from core.model_runtime.model_providers.gitee_ai._common import _CommonGiteeAI
 class GiteeAISpeech2TextModel(_CommonGiteeAI, Speech2TextModel):
    """
    Model class for OpenAI Compatible Speech to text model.
    """
    def _invoke(self, model: str, credentials: dict, file: IO[bytes], user: Optional[str] = None) -> str:
        """
        Invoke speech2text model
        :param model: model name
        :param credentials: model credentials
        :param file: audio file
        :param user: unique user id
        :return: text for given audio file
        """
        # doc: https://ai.gitee.com/docs/openapi/serverless#tag/serverless/POST/{service}/speech-to-text
        endpoint_url = f"https://ai.gitee.com/api/serverless/{model}/speech-to-text"
        files = [("file", file)]
        _, file_ext = os.path.splitext(file.name)
        headers = {"Content-Type": f"audio/{file_ext}", "Authorization": f"Bearer {credentials.get('api_key')}"}
        response = requests.post(endpoint_url, headers=headers, files=files)
        if response.status_code != 200:
            raise InvokeBadRequestError(response.text)
        response_data = response.json()
        return response_data["text"]
    def validate_credentials(self, model: str, credentials: dict) -> None:
        """
        Validate model credentials
        :param model: model name
        :param credentials: model credentials
        :return:
        """
        try:
            audio_file_path = self._get_demo_file_path()
            with open(audio_file_path, "rb") as audio_file:
                self._invoke(model, credentials, audio_file)
        except Exception as ex:
            raise CredentialsValidateFailedError(str(ex))
--- a/api/core/model_runtime/model_providers/gitee_ai/speech2text/whisper-base.yaml
+++ b/api/core/model_runtime/model_providers/gitee_ai/speech2text/whisper-base.yaml
@ -0,0 +1,5 @@
 model: whisper-base
 model_type: speech2text
 model_properties:
  file_upload_limit: 1
  supported_file_extensions: flac,mp3,mp4,mpeg,mpga,m4a,ogg,wav,webm
--- a/api/core/model_runtime/model_providers/gitee_ai/speech2text/whisper-large.yaml
+++ b/api/core/model_runtime/model_providers/gitee_ai/speech2text/whisper-large.yaml
@ -0,0 +1,5 @@
 model: whisper-large
 model_type: speech2text
 model_properties:
  file_upload_limit: 1
  supported_file_extensions: flac,mp3,mp4,mpeg,mpga,m4a,ogg,wav,webm
--- a/api/core/model_runtime/model_providers/gitee_ai/text_embedding/_position.yaml
+++ b/api/core/model_runtime/model_providers/gitee_ai/text_embedding/_position.yaml
@ -0,0 +1,3 @@
 - bge-large-zh-v1.5
 - bge-small-zh-v1.5
 - bge-m3
--- a/api/core/model_runtime/model_providers/gitee_ai/text_embedding/bge-large-zh-v1.5.yaml
+++ b/api/core/model_runtime/model_providers/gitee_ai/text_embedding/bge-large-zh-v1.5.yaml
@ -0,0 +1,8 @@
 model: bge-large-zh-v1.5
 label:
  zh_Hans: bge-large-zh-v1.5
  en_US: bge-large-zh-v1.5
 model_type: text-embedding
 model_properties:
  context_size: 200000
  max_chunks: 20
--- a/api/core/model_runtime/model_providers/gitee_ai/text_embedding/bge-m3.yaml
+++ b/api/core/model_runtime/model_providers/gitee_ai/text_embedding/bge-m3.yaml
@ -0,0 +1,8 @@
 model: bge-m3
 label:
  zh_Hans: bge-m3
  en_US: bge-m3
 model_type: text-embedding
 model_properties:
  context_size: 200000
  max_chunks: 20
--- a/api/core/model_runtime/model_providers/gitee_ai/text_embedding/bge-small-zh-v1.5.yaml
+++ b/api/core/model_runtime/model_providers/gitee_ai/text_embedding/bge-small-zh-v1.5.yaml
@ -0,0 +1,8 @@
 model: bge-small-zh-v1.5
 label:
  zh_Hans: bge-small-zh-v1.5
  en_US: bge-small-zh-v1.5
 model_type: text-embedding
 model_properties:
  context_size: 200000
  max_chunks: 20
--- a/api/core/model_runtime/model_providers/gitee_ai/text_embedding/text_embedding.py
+++ b/api/core/model_runtime/model_providers/gitee_ai/text_embedding/text_embedding.py
@ -0,0 +1,31 @@
 from typing import Optional
 from core.entities.embedding_type import EmbeddingInputType
 from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
 from core.model_runtime.model_providers.openai_api_compatible.text_embedding.text_embedding import (
    OAICompatEmbeddingModel,
 )
 class GiteeAIEmbeddingModel(OAICompatEmbeddingModel):
    def _invoke(
        self,
        model: str,
        credentials: dict,
        texts: list[str],
        user: Optional[str] = None,
        input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT,
    ) -> TextEmbeddingResult:
        self._add_custom_parameters(credentials, model)
        return super()._invoke(model, credentials, texts, user, input_type)
    def validate_credentials(self, model: str, credentials: dict) -> None:
        self._add_custom_parameters(credentials, None)
        super().validate_credentials(model, credentials)
    @staticmethod
    def _add_custom_parameters(credentials: dict, model: str) -> None:
        if model is None:
            model = "bge-m3"
        credentials["endpoint_url"] = f"https://ai.gitee.com/api/serverless/{model}/v1/"
--- a/api/core/model_runtime/model_providers/gitee_ai/tts/ChatTTS.yaml
+++ b/api/core/model_runtime/model_providers/gitee_ai/tts/ChatTTS.yaml
@ -0,0 +1,11 @@
 model: ChatTTS
 model_type: tts
 model_properties:
  default_voice: 'default'
  voices:
    - mode: 'default'
      name: 'Default'
      language: [ 'zh-Hans', 'en-US', 'de-DE', 'fr-FR', 'es-ES', 'it-IT', 'th-TH', 'id-ID' ]
  word_limit: 3500
  audio_type: 'mp3'
  max_workers: 5
--- a/api/core/model_runtime/model_providers/gitee_ai/tts/FunAudioLLM-CosyVoice-300M.yaml
+++ b/api/core/model_runtime/model_providers/gitee_ai/tts/FunAudioLLM-CosyVoice-300M.yaml
@ -0,0 +1,11 @@
 model: FunAudioLLM-CosyVoice-300M
 model_type: tts
 model_properties:
  default_voice: 'default'
  voices:
    - mode: 'default'
      name: 'Default'
      language: [ 'zh-Hans', 'en-US', 'de-DE', 'fr-FR', 'es-ES', 'it-IT', 'th-TH', 'id-ID' ]
  word_limit: 3500
  audio_type: 'mp3'
  max_workers: 5
--- a/api/core/model_runtime/model_providers/gitee_ai/tts/init.py
+++ b/api/core/model_runtime/model_providers/gitee_ai/tts/init.py
--- a/api/core/model_runtime/model_providers/gitee_ai/tts/_position.yaml
+++ b/api/core/model_runtime/model_providers/gitee_ai/tts/_position.yaml
@ -0,0 +1,4 @@
 - speecht5_tts
 - ChatTTS
 - fish-speech-1.2-sft
 - FunAudioLLM-CosyVoice-300M
--- a/api/core/model_runtime/model_providers/gitee_ai/tts/fish-speech-1.2-sft.yaml
+++ b/api/core/model_runtime/model_providers/gitee_ai/tts/fish-speech-1.2-sft.yaml
@ -0,0 +1,11 @@
 model: fish-speech-1.2-sft
 model_type: tts
 model_properties:
  default_voice: 'default'
  voices:
    - mode: 'default'
      name: 'Default'
      language: [ 'zh-Hans', 'en-US', 'de-DE', 'fr-FR', 'es-ES', 'it-IT', 'th-TH', 'id-ID' ]
  word_limit: 3500
  audio_type: 'mp3'
  max_workers: 5
--- a/api/core/model_runtime/model_providers/gitee_ai/tts/speecht5_tts.yaml
+++ b/api/core/model_runtime/model_providers/gitee_ai/tts/speecht5_tts.yaml
@ -0,0 +1,11 @@
 model: speecht5_tts
 model_type: tts
 model_properties:
  default_voice: 'default'
  voices:
    - mode: 'default'
      name: 'Default'
      language: [ 'zh-Hans', 'en-US', 'de-DE', 'fr-FR', 'es-ES', 'it-IT', 'th-TH', 'id-ID' ]
  word_limit: 3500
  audio_type: 'mp3'
  max_workers: 5
--- a/api/core/model_runtime/model_providers/gitee_ai/tts/tts.py
+++ b/api/core/model_runtime/model_providers/gitee_ai/tts/tts.py
@ -0,0 +1,79 @@
 from typing import Optional
 import requests
 from core.model_runtime.errors.invoke import InvokeBadRequestError
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.__base.tts_model import TTSModel
 from core.model_runtime.model_providers.gitee_ai._common import _CommonGiteeAI
 class GiteeAIText2SpeechModel(_CommonGiteeAI, TTSModel):
    """
    Model class for OpenAI Speech to text model.
    """
    def _invoke(
        self, model: str, tenant_id: str, credentials: dict, content_text: str, voice: str, user: Optional[str] = None
    ) -> any:
        """
        _invoke text2speech model
        :param model: model name
        :param tenant_id: user tenant id
        :param credentials: model credentials
        :param content_text: text content to be translated
        :param voice: model timbre
        :param user: unique user id
        :return: text translated to audio file
        """
        return self._tts_invoke_streaming(model=model, credentials=credentials, content_text=content_text, voice=voice)
    def validate_credentials(self, model: str, credentials: dict) -> None:
        """
        validate credentials text2speech model
        :param model: model name
        :param credentials: model credentials
        :return: text translated to audio file
        """
        try:
            self._tts_invoke_streaming(
                model=model,
                credentials=credentials,
                content_text="Hello Dify!",
                voice=self._get_model_default_voice(model, credentials),
            )
        except Exception as ex:
            raise CredentialsValidateFailedError(str(ex))
    def _tts_invoke_streaming(self, model: str, credentials: dict, content_text: str, voice: str) -> any:
        """
        _tts_invoke_streaming text2speech model
        :param model: model name
        :param credentials: model credentials
        :param content_text: text content to be translated
        :param voice: model timbre
        :return: text translated to audio file
        """
        try:
            # doc: https://ai.gitee.com/docs/openapi/serverless#tag/serverless/POST/{service}/text-to-speech
            endpoint_url = "https://ai.gitee.com/api/serverless/" + model + "/text-to-speech"
            headers = {"Content-Type": "application/json"}
            api_key = credentials.get("api_key")
            if api_key:
                headers["Authorization"] = f"Bearer {api_key}"
            payload = {"inputs": content_text}
            response = requests.post(endpoint_url, headers=headers, json=payload)
            if response.status_code != 200:
                raise InvokeBadRequestError(response.text)
            data = response.content
            for i in range(0, len(data), 1024):
                yield data[i : i + 1024]
        except Exception as ex:
            raise InvokeBadRequestError(str(ex))
--- a/api/pytest.ini
+++ b/api/pytest.ini
@ -27,3 +27,4 @@ env =
    XINFERENCE_GENERATION_MODEL_UID = generate
    XINFERENCE_RERANK_MODEL_UID = rerank
    XINFERENCE_SERVER_URL = http://a.abc.com:11451
    GITEE_AI_API_KEY = aaaaaaaaaaaaaaaaaaaa
--- a/api/tests/integration_tests/.env.example
+++ b/api/tests/integration_tests/.env.example
@ -83,3 +83,6 @@ VOLC_EMBEDDING_ENDPOINT_ID=
 # 360 AI Credentials
 ZHINAO_API_KEY=
 # Gitee AI Credentials
 GITEE_AI_API_KEY=
--- a/api/tests/integration_tests/model_runtime/gitee_ai/init.py
+++ b/api/tests/integration_tests/model_runtime/gitee_ai/init.py
--- a/api/tests/integration_tests/model_runtime/gitee_ai/test_llm.py
+++ b/api/tests/integration_tests/model_runtime/gitee_ai/test_llm.py
@ -0,0 +1,132 @@
 import os
 from collections.abc import Generator
 import pytest
 from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
 from core.model_runtime.entities.message_entities import (
    AssistantPromptMessage,
    PromptMessageTool,
    SystemPromptMessage,
    UserPromptMessage,
 )
 from core.model_runtime.entities.model_entities import AIModelEntity
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.gitee_ai.llm.llm import GiteeAILargeLanguageModel
 def test_predefined_models():
    model = GiteeAILargeLanguageModel()
    model_schemas = model.predefined_models()
    assert len(model_schemas) >= 1
    assert isinstance(model_schemas[0], AIModelEntity)
 def test_validate_credentials_for_chat_model():
    model = GiteeAILargeLanguageModel()
    with pytest.raises(CredentialsValidateFailedError):
        # model name to gpt-3.5-turbo because of mocking
        model.validate_credentials(model="gpt-3.5-turbo", credentials={"api_key": "invalid_key"})
    model.validate_credentials(
        model="Qwen2-7B-Instruct",
        credentials={"api_key": os.environ.get("GITEE_AI_API_KEY")},
    )
 def test_invoke_chat_model():
    model = GiteeAILargeLanguageModel()
    result = model.invoke(
        model="Qwen2-7B-Instruct",
        credentials={"api_key": os.environ.get("GITEE_AI_API_KEY")},
        prompt_messages=[
            SystemPromptMessage(
                content="You are a helpful AI assistant.",
            ),
            UserPromptMessage(content="Hello World!"),
        ],
        model_parameters={
            "temperature": 0.0,
            "top_p": 1.0,
            "presence_penalty": 0.0,
            "frequency_penalty": 0.0,
            "max_tokens": 10,
            "stream": False,
        },
        stop=["How"],
        stream=False,
        user="foo",
    )
    assert isinstance(result, LLMResult)
    assert len(result.message.content) > 0
 def test_invoke_stream_chat_model():
    model = GiteeAILargeLanguageModel()
    result = model.invoke(
        model="Qwen2-7B-Instruct",
        credentials={"api_key": os.environ.get("GITEE_AI_API_KEY")},
        prompt_messages=[
            SystemPromptMessage(
                content="You are a helpful AI assistant.",
            ),
            UserPromptMessage(content="Hello World!"),
        ],
        model_parameters={"temperature": 0.0, "max_tokens": 100, "stream": False},
        stream=True,
        user="foo",
    )
    assert isinstance(result, Generator)
    for chunk in result:
        assert isinstance(chunk, LLMResultChunk)
        assert isinstance(chunk.delta, LLMResultChunkDelta)
        assert isinstance(chunk.delta.message, AssistantPromptMessage)
        assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
        if chunk.delta.finish_reason is not None:
            assert chunk.delta.usage is not None
 def test_get_num_tokens():
    model = GiteeAILargeLanguageModel()
    num_tokens = model.get_num_tokens(
        model="Qwen2-7B-Instruct",
        credentials={"api_key": os.environ.get("GITEE_AI_API_KEY")},
        prompt_messages=[UserPromptMessage(content="Hello World!")],
    )
    assert num_tokens == 10
    num_tokens = model.get_num_tokens(
        model="Qwen2-7B-Instruct",
        credentials={"api_key": os.environ.get("GITEE_AI_API_KEY")},
        prompt_messages=[
            SystemPromptMessage(
                content="You are a helpful AI assistant.",
            ),
            UserPromptMessage(content="Hello World!"),
        ],
        tools=[
            PromptMessageTool(
                name="get_weather",
                description="Determine weather in my location",
                parameters={
                    "type": "object",
                    "properties": {
                        "location": {"type": "string", "description": "The city and state e.g. San Francisco, CA"},
                        "unit": {"type": "string", "enum": ["c", "f"]},
                    },
                    "required": ["location"],
                },
            ),
        ],
    )
    assert num_tokens == 77
--- a/api/tests/integration_tests/model_runtime/gitee_ai/test_provider.py
+++ b/api/tests/integration_tests/model_runtime/gitee_ai/test_provider.py
@ -0,0 +1,15 @@
 import os
 import pytest
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.gitee_ai.gitee_ai import GiteeAIProvider
 def test_validate_provider_credentials():
    provider = GiteeAIProvider()
    with pytest.raises(CredentialsValidateFailedError):
        provider.validate_provider_credentials(credentials={"api_key": "invalid_key"})
    provider.validate_provider_credentials(credentials={"api_key": os.environ.get("GITEE_AI_API_KEY")})
--- a/api/tests/integration_tests/model_runtime/gitee_ai/test_rerank.py
+++ b/api/tests/integration_tests/model_runtime/gitee_ai/test_rerank.py
@ -0,0 +1,47 @@
 import os
 import pytest
 from core.model_runtime.entities.rerank_entities import RerankResult
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.gitee_ai.rerank.rerank import GiteeAIRerankModel
 def test_validate_credentials():
    model = GiteeAIRerankModel()
    with pytest.raises(CredentialsValidateFailedError):
        model.validate_credentials(
            model="bge-reranker-v2-m3",
            credentials={"api_key": "invalid_key"},
        )
    model.validate_credentials(
        model="bge-reranker-v2-m3",
        credentials={
            "api_key": os.environ.get("GITEE_AI_API_KEY"),
        },
    )
 def test_invoke_model():
    model = GiteeAIRerankModel()
    result = model.invoke(
        model="bge-reranker-v2-m3",
        credentials={
            "api_key": os.environ.get("GITEE_AI_API_KEY"),
        },
        query="What is the capital of the United States?",
        docs=[
            "Carson City is the capital city of the American state of Nevada. At the 2010 United States "
            "Census, Carson City had a population of 55,274.",
            "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that "
            "are a political division controlled by the United States. Its capital is Saipan.",
        ],
        top_n=1,
        score_threshold=0.01,
    )
    assert isinstance(result, RerankResult)
    assert len(result.docs) == 1
    assert result.docs[0].score >= 0.01
--- a/api/tests/integration_tests/model_runtime/gitee_ai/test_speech2text.py
+++ b/api/tests/integration_tests/model_runtime/gitee_ai/test_speech2text.py
@ -0,0 +1,45 @@
 import os
 import pytest
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.gitee_ai.speech2text.speech2text import GiteeAISpeech2TextModel
 def test_validate_credentials():
    model = GiteeAISpeech2TextModel()
    with pytest.raises(CredentialsValidateFailedError):
        model.validate_credentials(
            model="whisper-base",
            credentials={"api_key": "invalid_key"},
        )
    model.validate_credentials(
        model="whisper-base",
        credentials={"api_key": os.environ.get("GITEE_AI_API_KEY")},
    )
 def test_invoke_model():
    model = GiteeAISpeech2TextModel()
    # Get the directory of the current file
    current_dir = os.path.dirname(os.path.abspath(__file__))
    # Get assets directory
    assets_dir = os.path.join(os.path.dirname(current_dir), "assets")
    # Construct the path to the audio file
    audio_file_path = os.path.join(assets_dir, "audio.mp3")
    # Open the file and get the file object
    with open(audio_file_path, "rb") as audio_file:
        file = audio_file
        result = model.invoke(
            model="whisper-base", credentials={"api_key": os.environ.get("GITEE_AI_API_KEY")}, file=file
        )
        assert isinstance(result, str)
        assert result == "1 2 3 4 5 6 7 8 9 10"
--- a/api/tests/integration_tests/model_runtime/gitee_ai/test_text_embedding.py
+++ b/api/tests/integration_tests/model_runtime/gitee_ai/test_text_embedding.py
@ -0,0 +1,46 @@
 import os
 import pytest
 from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
 from core.model_runtime.errors.validate import CredentialsValidateFailedError
 from core.model_runtime.model_providers.gitee_ai.text_embedding.text_embedding import GiteeAIEmbeddingModel
 def test_validate_credentials():
    model = GiteeAIEmbeddingModel()
    with pytest.raises(CredentialsValidateFailedError):
        model.validate_credentials(model="bge-large-zh-v1.5", credentials={"api_key": "invalid_key"})
    model.validate_credentials(model="bge-large-zh-v1.5", credentials={"api_key": os.environ.get("GITEE_AI_API_KEY")})
 def test_invoke_model():
    model = GiteeAIEmbeddingModel()
    result = model.invoke(
        model="bge-large-zh-v1.5",
        credentials={
            "api_key": os.environ.get("GITEE_AI_API_KEY"),
        },
        texts=["hello", "world"],
        user="user",
    )
    assert isinstance(result, TextEmbeddingResult)
    assert len(result.embeddings) == 2
 def test_get_num_tokens():
    model = GiteeAIEmbeddingModel()
    num_tokens = model.get_num_tokens(
        model="bge-large-zh-v1.5",
        credentials={
            "api_key": os.environ.get("GITEE_AI_API_KEY"),
        },
        texts=["hello", "world"],
    )
    assert num_tokens == 2
--- a/api/tests/integration_tests/model_runtime/gitee_ai/test_tts.py
+++ b/api/tests/integration_tests/model_runtime/gitee_ai/test_tts.py
@ -0,0 +1,23 @@
 import os
 from core.model_runtime.model_providers.gitee_ai.tts.tts import GiteeAIText2SpeechModel
 def test_invoke_model():
    model = GiteeAIText2SpeechModel()
    result = model.invoke(
        model="speecht5_tts",
        tenant_id="test",
        credentials={
            "api_key": os.environ.get("GITEE_AI_API_KEY"),
        },
        content_text="Hello, world!",
        voice="",
    )
    content = b""
    for chunk in result:
        content += chunk
    assert content != b""