feat:nvidia add nemotron4-340b and microsoft/phi-3 (#6973)

2024-11-16 11:42:29 +08:00 · 2024-08-06 02:16:41 +00:00 · 2024-08-06 02:16:41 +00:00 · 23ed15d19f
commit 23ed15d19f
parent 312d905c9b
5 changed files with 115 additions and 2 deletions
--- a/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/_position.yaml
@ -10,5 +10,8 @@
 - mistralai/mistral-large
 - mistralai/mixtral-8x7b-instruct-v0.1
 - mistralai/mixtral-8x22b-instruct-v0.1
+- nvidia/nemotron-4-340b-instruct
+- microsoft/phi-3-medium-128k-instruct
+- microsoft/phi-3-mini-128k-instruct
 - fuyu-8b
 - snowflake/arctic
--- a/api/core/model_runtime/model_providers/nvidia/llm/llm.py
+++ b/api/core/model_runtime/model_providers/nvidia/llm/llm.py
@ -34,8 +34,10 @@ class NVIDIALargeLanguageModel(OAIAPICompatLargeLanguageModel):
        'meta/llama-3.1-8b-instruct': '',
        'meta/llama-3.1-70b-instruct': '',
        'meta/llama-3.1-405b-instruct': '',
-        'google/recurrentgemma-2b': ''
-        
+        'google/recurrentgemma-2b': '',
+        'nvidia/nemotron-4-340b-instruct': '',
+        'microsoft/phi-3-medium-128k-instruct':'',
+        'microsoft/phi-3-mini-128k-instruct':''
    }

    def _invoke(self, model: str, credentials: dict,
--- a/api/core/model_runtime/model_providers/nvidia/llm/nemotron-4-340b-instruct.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/nemotron-4-340b-instruct.yaml
@ -0,0 +1,36 @@
+model: nvidia/nemotron-4-340b-instruct
+label:
+  zh_Hans: nvidia/nemotron-4-340b-instruct
+  en_US: nvidia/nemotron-4-340b-instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 1
+    default: 0.5
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 4096
+    default: 1024
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
--- a/api/core/model_runtime/model_providers/nvidia/llm/phi-3-medium-128k-instruct.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/phi-3-medium-128k-instruct.yaml
@ -0,0 +1,36 @@
+model: microsoft/phi-3-medium-128k-instruct
+label:
+  zh_Hans: microsoft/phi-3-medium-128k-instruct
+  en_US: microsoft/phi-3-medium-128k-instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 1
+    default: 0.5
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 4096
+    default: 1024
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0
--- a/api/core/model_runtime/model_providers/nvidia/llm/phi-3-mini-128k-instruct.yaml
+++ b/api/core/model_runtime/model_providers/nvidia/llm/phi-3-mini-128k-instruct.yaml
@ -0,0 +1,36 @@
+model: microsoft/phi-3-mini-128k-instruct
+label:
+  zh_Hans: microsoft/phi-3-mini-128k-instruct
+  en_US: microsoft/phi-3-mini-128k-instruct
+model_type: llm
+features:
+  - agent-thought
+model_properties:
+  mode: chat
+  context_size: 131072
+parameter_rules:
+  - name: temperature
+    use_template: temperature
+    min: 0
+    max: 1
+    default: 0.5
+  - name: top_p
+    use_template: top_p
+    min: 0
+    max: 1
+    default: 1
+  - name: max_tokens
+    use_template: max_tokens
+    min: 1
+    max: 4096
+    default: 1024
+  - name: frequency_penalty
+    use_template: frequency_penalty
+    min: -2
+    max: 2
+    default: 0
+  - name: presence_penalty
+    use_template: presence_penalty
+    min: -2
+    max: 2
+    default: 0