feat: nvidia add llama3.1 model (#6844)

2024-11-16 11:42:29 +08:00 · 2024-07-31 21:24:02 +08:00 · 2024-07-31 21:24:02 +08:00 · 56b43f62d1
commit 56b43f62d1
parent 4b410494b3
5 changed files with 114 additions and 0 deletions
--- a/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml
@ -2,6 +2,9 @@
 - google/codegemma-7b
 - google/recurrentgemma-2b
 - meta/llama2-70b
 - meta/llama-3.1-8b-instruct
 - meta/llama-3.1-70b-instruct
 - meta/llama-3.1-405b-instruct
 - meta/llama3-8b-instruct
 - meta/llama3-70b-instruct
 - mistralai/mistral-large
--- a/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-405b.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-405b.yaml
@ -0,0 +1,36 @@
 model: meta/llama-3.1-405b-instruct
 label:
  zh_Hans: meta/llama-3.1-405b-instruct
  en_US: meta/llama-3.1-405b-instruct
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: chat
  context_size: 131072
 parameter_rules:
  - name: temperature
    use_template: temperature
    min: 0
    max: 1
    default: 0.5
  - name: top_p
    use_template: top_p
    min: 0
    max: 1
    default: 1
  - name: max_tokens
    use_template: max_tokens
    min: 1
    max: 4096
    default: 1024
  - name: frequency_penalt
    use_template: frequency_penalty
    min: -2
    max: 2
    default: 0
  - name: presence_penalty
    use_template: presence_penalty
    min: -2
    max: 2
    default: 0
--- a/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-70b.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-70b.yaml
@ -0,0 +1,36 @@
 model: meta/llama-3.1-70b-instruct
 label:
  zh_Hans: meta/llama-3.1-70b-instruct
  en_US: meta/llama-3.1-70b-instruct
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: chat
  context_size: 131072
 parameter_rules:
  - name: temperature
    use_template: temperature
    min: 0
    max: 1
    default: 0.5
  - name: top_p
    use_template: top_p
    min: 0
    max: 1
    default: 1
  - name: max_tokens
    use_template: max_tokens
    min: 1
    max: 4096
    default: 1024
  - name: frequency_penalty
    use_template: frequency_penalty
    min: -2
    max: 2
    default: 0
  - name: presence_penalty
    use_template: presence_penalty
    min: -2
    max: 2
    default: 0
--- a/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-8b.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/llama-3.1-8b.yaml
@ -0,0 +1,36 @@
 model: meta/llama-3.1-8b-instruct
 label:
  zh_Hans: meta/llama-3.1-8b-instruct
  en_US: meta/llama-3.1-8b-instruct
 model_type: llm
 features:
  - agent-thought
 model_properties:
  mode: chat
  context_size: 131072
 parameter_rules:
  - name: temperature
    use_template: temperature
    min: 0
    max: 1
    default: 0.5
  - name: top_p
    use_template: top_p
    min: 0
    max: 1
    default: 1
  - name: max_tokens
    use_template: max_tokens
    min: 1
    max: 4096
    default: 1024
  - name: frequency_penalty
    use_template: frequency_penalty
    min: -2
    max: 2
    default: 0
  - name: presence_penalty
    use_template: presence_penalty
    min: -2
    max: 2
    default: 0
--- a/api/core/model_runtime/model_providers/nvidia/llm/llm.py
+++ b/api/core/model_runtime/model_providers/nvidia/llm/llm.py
@ -31,6 +31,9 @@ class NVIDIALargeLanguageModel(OAIAPICompatLargeLanguageModel):
        'meta/llama2-70b': '',
        'meta/llama3-8b-instruct': '',
        'meta/llama3-70b-instruct': '',
        'meta/llama-3.1-8b-instruct': '',
        'meta/llama-3.1-70b-instruct': '',
        'meta/llama-3.1-405b-instruct': '',
        'google/recurrentgemma-2b': ''
    }