feat: update prompt generate (#6516)

2024-11-16 03:32:23 +08:00 · 2024-07-23 19:52:14 +08:00 · 2024-07-23 19:52:14 +08:00 · 8123a00e97
commit 8123a00e97
parent 0f6a064c08
5 changed files with 219 additions and 95 deletions
--- a/api/controllers/console/app/generator.py
+++ b/api/controllers/console/app/generator.py
@ -22,17 +22,19 @@ class RuleGenerateApi(Resource):
    @account_initialization_required
    def post(self):
        parser = reqparse.RequestParser()
-        parser.add_argument('audiences', type=str, required=True, nullable=False, location='json')
-        parser.add_argument('hoping_to_solve', type=str, required=True, nullable=False, location='json')
+        parser.add_argument('instruction', type=str, required=True, nullable=False, location='json')
+        parser.add_argument('model_config', type=dict, required=True, nullable=False, location='json')
+        parser.add_argument('no_variable', type=bool, required=True, default=False, location='json')
        args = parser.parse_args()

        account = current_user

        try:
            rules = LLMGenerator.generate_rule_config(
-                account.current_tenant_id,
-                args['audiences'],
-                args['hoping_to_solve']
+                tenant_id=account.current_tenant_id,
+                instruction=args['instruction'],
+                model_config=args['model_config'],
+                no_variable=args['no_variable']
            )
        except ProviderTokenNotInitError as ex:
            raise ProviderNotInitializeError(ex.description)
--- a/api/core/llm_generator/llm_generator.py
+++ b/api/core/llm_generator/llm_generator.py
@ -3,10 +3,13 @@ import logging
 import re
 from typing import Optional

-from core.llm_generator.output_parser.errors import OutputParserException
 from core.llm_generator.output_parser.rule_config_generator import RuleConfigGeneratorOutputParser
 from core.llm_generator.output_parser.suggested_questions_after_answer import SuggestedQuestionsAfterAnswerOutputParser
-from core.llm_generator.prompts import CONVERSATION_TITLE_PROMPT, GENERATOR_QA_PROMPT
+from core.llm_generator.prompts import (
+    CONVERSATION_TITLE_PROMPT,
+    GENERATOR_QA_PROMPT,
+    WORKFLOW_RULE_CONFIG_PROMPT_GENERATE_TEMPLATE,
+)
 from core.model_manager import ModelManager
 from core.model_runtime.entities.message_entities import SystemPromptMessage, UserPromptMessage
 from core.model_runtime.entities.model_entities import ModelType
@ -115,55 +118,158 @@ class LLMGenerator:
        return questions

    @classmethod
-    def generate_rule_config(cls, tenant_id: str, audiences: str, hoping_to_solve: str) -> dict:
+    def generate_rule_config(cls, tenant_id: str, instruction: str, model_config: dict, no_variable: bool) -> dict:
        output_parser = RuleConfigGeneratorOutputParser()

+        error = ""
+        error_step = ""
+        rule_config = {
+            "prompt": "",
+            "variables": [],
+            "opening_statement": "",
+            "error": ""
+        }
+        model_parameters = {
+            "max_tokens": 512,
+            "temperature": 0.01
+        }
+
+        if no_variable:
+            prompt_template = PromptTemplateParser(
+                WORKFLOW_RULE_CONFIG_PROMPT_GENERATE_TEMPLATE
+            )
+
+            prompt_generate = prompt_template.format(
+                inputs={
+                    "TASK_DESCRIPTION": instruction,
+                },
+                remove_template_variables=False
+            )
+
+            prompt_messages = [UserPromptMessage(content=prompt_generate)]
+
+            model_manager = ModelManager()
+
+            model_instance = model_manager.get_default_model_instance(
+                tenant_id=tenant_id,
+                model_type=ModelType.LLM,
+            )
+
+            try:
+                response = model_instance.invoke_llm(
+                    prompt_messages=prompt_messages,
+                    model_parameters=model_parameters,
+                    stream=False
+                )
+
+                rule_config["prompt"] = response.message.content
+                
+            except InvokeError as e:
+                error = str(e)
+                error_step = "generate rule config"
+            except Exception as e:
+                logging.exception(e)
+                rule_config["error"] = str(e)
+
+            rule_config["error"] = f"Failed to {error_step}. Error: {error}" if error else ""
+
+            return rule_config
+
+        # get rule config prompt, parameter and statement
+        prompt_generate, parameter_generate, statement_generate = output_parser.get_format_instructions()
+
        prompt_template = PromptTemplateParser(
-            template=output_parser.get_format_instructions()
+            prompt_generate
        )

-        prompt = prompt_template.format(
+        parameter_template = PromptTemplateParser(
+            parameter_generate
+        )
+
+        statement_template = PromptTemplateParser(
+            statement_generate
+        )
+
+        # format the prompt_generate_prompt
+        prompt_generate_prompt = prompt_template.format(
            inputs={
-                "audiences": audiences,
-                "hoping_to_solve": hoping_to_solve,
-                "variable": "{{variable}}",
-                "lanA": "{{lanA}}",
-                "lanB": "{{lanB}}",
-                "topic": "{{topic}}"
+                "TASK_DESCRIPTION": instruction,
            },
            remove_template_variables=False
        )
+        prompt_messages = [UserPromptMessage(content=prompt_generate_prompt)]

+        # get model instance
        model_manager = ModelManager()
-        model_instance = model_manager.get_default_model_instance(
+        model_instance = model_manager.get_model_instance(
            tenant_id=tenant_id,
            model_type=ModelType.LLM,
+            provider=model_config.get("provider") if model_config else None,
+            model=model_config.get("name") if model_config else None,
        )

-        prompt_messages = [UserPromptMessage(content=prompt)]
-
        try:
-            response = model_instance.invoke_llm(
-                prompt_messages=prompt_messages,
-                model_parameters={
-                    "max_tokens": 512,
-                    "temperature": 0
-                },
-                stream=False
-            )
+            try:
+                # the first step to generate the task prompt
+                prompt_content = model_instance.invoke_llm(
+                    prompt_messages=prompt_messages,
+                    model_parameters=model_parameters,
+                    stream=False
+                )
+            except InvokeError as e:
+                error = str(e)
+                error_step = "generate prefix prompt"
+                rule_config["error"] = f"Failed to {error_step}. Error: {error}" if error else ""
+
+                return rule_config
+
+            rule_config["prompt"] = prompt_content.message.content
+
+            parameter_generate_prompt = parameter_template.format(
+                inputs={
+                    "INPUT_TEXT": prompt_content.message.content,
+                },
+                remove_template_variables=False
+            )
+            parameter_messages = [UserPromptMessage(content=parameter_generate_prompt)]
+
+            # the second step to generate the task_parameter and task_statement
+            statement_generate_prompt = statement_template.format(
+                inputs={
+                    "TASK_DESCRIPTION": instruction,
+                    "INPUT_TEXT": prompt_content.message.content,
+                },
+                remove_template_variables=False
+            )
+            statement_messages = [UserPromptMessage(content=statement_generate_prompt)]
+
+            try:
+                parameter_content = model_instance.invoke_llm(
+                    prompt_messages=parameter_messages,
+                    model_parameters=model_parameters,
+                    stream=False
+                )
+                rule_config["variables"] = re.findall(r'"\s*([^"]+)\s*"', parameter_content.message.content)
+            except InvokeError as e:
+                error = str(e)
+                error_step = "generate variables"
+
+            try:
+                statement_content = model_instance.invoke_llm(
+                    prompt_messages=statement_messages,
+                    model_parameters=model_parameters,
+                    stream=False
+                )
+                rule_config["opening_statement"] = statement_content.message.content
+            except InvokeError as e:
+                error = str(e)
+                error_step = "generate conversation opener"

-            rule_config = output_parser.parse(response.message.content)
-        except InvokeError as e:
-            raise e
-        except OutputParserException:
-            raise ValueError('Please give a valid input for intended audience or hoping to solve problems.')
        except Exception as e:
            logging.exception(e)
-            rule_config = {
-                "prompt": "",
-                "variables": [],
-                "opening_statement": ""
-            }
+            rule_config["error"] = str(e)
+
+        rule_config["error"] = f"Failed to {error_step}. Error: {error}" if error else ""

        return rule_config

--- a/api/core/llm_generator/output_parser/rule_config_generator.py
+++ b/api/core/llm_generator/output_parser/rule_config_generator.py
@ -1,14 +1,18 @@
 from typing import Any

 from core.llm_generator.output_parser.errors import OutputParserException
-from core.llm_generator.prompts import RULE_CONFIG_GENERATE_TEMPLATE
+from core.llm_generator.prompts import (
+    RULE_CONFIG_PARAMETER_GENERATE_TEMPLATE,
+    RULE_CONFIG_PROMPT_GENERATE_TEMPLATE,
+    RULE_CONFIG_STATEMENT_GENERATE_TEMPLATE,
+)
 from libs.json_in_md_parser import parse_and_check_json_markdown


 class RuleConfigGeneratorOutputParser:

-    def get_format_instructions(self) -> str:
-        return RULE_CONFIG_GENERATE_TEMPLATE
+    def get_format_instructions(self) -> tuple[str, str, str]:
+        return RULE_CONFIG_PROMPT_GENERATE_TEMPLATE, RULE_CONFIG_PARAMETER_GENERATE_TEMPLATE, RULE_CONFIG_STATEMENT_GENERATE_TEMPLATE

    def parse(self, text: str) -> Any:
        try:
--- a/api/core/llm_generator/prompts.py
+++ b/api/core/llm_generator/prompts.py
@ -81,65 +81,73 @@ GENERATOR_QA_PROMPT = (
    '<QA Pairs>'
 )

-RULE_CONFIG_GENERATE_TEMPLATE = """Given MY INTENDED AUDIENCES and HOPING TO SOLVE using a language model, please select \
-the model prompt that best suits the input. 
-You will be provided with the prompt, variables, and an opening statement. 
-Only the content enclosed in double curly braces, such as {{variable}}, in the prompt can be considered as a variable; \
-otherwise, it cannot exist as a variable in the variables.
-If you believe revising the original input will result in a better response from the language model, you may \
-suggest revisions.
+WORKFLOW_RULE_CONFIG_PROMPT_GENERATE_TEMPLATE = """
+Here is a task description for which I would like you to create a high-quality prompt template for:
+<task_description>
+{{TASK_DESCRIPTION}}
+</task_description>
+Based on task description, please create a well-structured prompt template that another AI could use to consistently complete the task. The prompt template should include:
+- Do not inlcude <input> or <output> section and variables in the prompt, assume user will add them at their own will. 
+- Clear instructions for the AI that will be using this prompt, demarcated with <instructions> tags. The instructions should provide step-by-step directions on how to complete the task using the input variables. Also Specifies in the instructions that the output should not contain any xml tag. 
+- Relevant examples if needed to clarify the task further, demarcated with <example> tags. Do not include variables in the prompt. Give three pairs of input and output examples.   
+- Include other relevant sections demarcated with appropriate XML tags like <examples>, <instructions>.
+- Use the same language as task description. 
+- Output in ``` xml ``` and start with <instruction>
+Please generate the full prompt template with at least 300 words and output only the prompt template.
+"""

-<<PRINCIPLES OF GOOD PROMPT>>
-Integrate the intended audience in the prompt e.g. the audience is an expert in the field.
-Break down complex tasks into a sequence of simpler prompts in an interactive conversation.
-Implement example-driven prompting (Use few-shot prompting). 
-When formatting your prompt start with Instruction followed by either Example if relevant. \
-Subsequently present your content. Use one or more line breaks to separate instructions examples questions context and input data.
-Incorporate the following phrases: “Your task is” and “You MUST”.
-Incorporate the following phrases: “You will be penalized”.
-Use leading words like writing “think step by step”.
-Add to your prompt the following phrase “Ensure that your answer is unbiased and does not rely on stereotypes”.
-Assign a role to the large language models.
-Use Delimiters.
-To write an essay /text /paragraph /article or any type of text that should be detailed: “Write a detailed [essay/text/paragraph] for me on [topic] in detail by adding all the information necessary”.
-Clearly state the requirements that the model must follow in order to produce content in the form of the keywords regulations hint or instructions
+RULE_CONFIG_PROMPT_GENERATE_TEMPLATE = """
+Here is a task description for which I would like you to create a high-quality prompt template for:
+<task_description>
+{{TASK_DESCRIPTION}}
+</task_description>
+Based on task description, please create a well-structured prompt template that another AI could use to consistently complete the task. The prompt template should include:
+- Descriptive variable names surrounded by {{ }} (two curly brackets) to indicate where the actual values will be substituted in. Choose variable names that clearly indicate the type of value expected. Variable names have to be composed of number, english alphabets and underline and nothing else. 
+- Clear instructions for the AI that will be using this prompt, demarcated with <instructions> tags. The instructions should provide step-by-step directions on how to complete the task using the input variables. Also Specifies in the instructions that the output should not contain any xml tag. 
+- Relevant examples if needed to clarify the task further, demarcated with <example> tags. Do not use curly brackets any other than in <instruction> section. 
+- Any other relevant sections demarcated with appropriate XML tags like <input>, <output>, etc.
+- Use the same language as task description. 
+- Output in ``` xml ``` and start with <instruction>
+Please generate the full prompt template and output only the prompt template.
+"""

-<< FORMATTING >>
-Return a markdown code snippet with a JSON object formatted to look like, \
-no any other string out of markdown code snippet:
-```json
-{{{{
-    "prompt": string \\ generated prompt
-    "variables": list of string \\ variables
-    "opening_statement": string \\ an opening statement to guide users on how to ask questions with generated prompt \
-and fill in variables, with a welcome sentence, and keep TLDR.
-}}}}
-```
+RULE_CONFIG_PARAMETER_GENERATE_TEMPLATE = """
+I need to extract the following information from the input text. The <information to be extracted> tag specifies the 'type', 'description' and 'required' of the information to be extracted. 
+<information to be extracted>
+variables name bounded two double curly brackets. Variable name has to be composed of number, english alphabets and underline and nothing else. 
+</information to be extracted>

-<< EXAMPLES >>
-[EXAMPLE A]
-```json
-{
-  "prompt": "I need your help to translate the following {{Input_language}}paper paragraph into {{Target_language}}, in a style similar to a popular science magazine in {{Target_language}}. #### Rules Ensure accurate conveyance of the original text's facts and context during translation. Maintain the original paragraph format and retain technical terms and company abbreviations ",
-  "variables": ["Input_language", "Target_language"],
-  "opening_statement": " Hi. I am your translation assistant. I can help you with any translation and ensure accurate conveyance of information. "
-}
-```
+Step 1: Carefully read the input and understand the structure of the expected output.
+Step 2: Extract relevant parameters from the provided text based on the name and description of object. 
+Step 3: Structure the extracted parameters to JSON object as specified in <structure>.
+Step 4: Ensure that the list of variable_names is properly formatted and valid. The output should not contain any XML tags. Output an empty list if there is no valid variable name in input text. 

-[EXAMPLE B]
-```json
-{
-  "prompt": "Your task is to review the provided meeting notes and create a concise summary that captures the essential information, focusing on key takeaways and action items assigned to specific individuals or departments during the meeting. Use clear and professional language, and organize the summary in a logical manner using appropriate formatting such as headings, subheadings, and bullet points. Ensure that the summary is easy to understand and provides a comprehensive but succinct overview of the meeting's content, with a particular focus on clearly indicating who is responsible for each action item.",
-  "variables": ["meeting_notes"],
-  "opening_statement": "Hi! I'm your meeting notes summarizer AI. I can help you with any meeting notes and ensure accurate conveyance of information."
-}
-```
+### Structure
+Here is the structure of the expected output, I should always follow the output structure. 
+["variable_name_1", "variable_name_2"]

-<< MY INTENDED AUDIENCES >>
-{{audiences}}
+### Input Text
+Inside <text></text> XML tags, there is a text that I should extract parameters and convert to a JSON object.
+<text>
+{{INPUT_TEXT}}
+</text>

-<< HOPING TO SOLVE >>
-{{hoping_to_solve}}
+### Answer
+I should always output a valid list. Output nothing other than the list of variable_name. Output an empty list if there is no variable name in input text.
+"""

-<< OUTPUT >>
-"""
+RULE_CONFIG_STATEMENT_GENERATE_TEMPLATE = """
+<instruction>
+Step 1: Identify the purpose of the chatbot from the variable {{TASK_DESCRIPTION}} and infer chatbot's tone  (e.g., friendly, professional, etc.) to add personality traits. 
+Step 2: Create a coherent and engaging opening statement.
+Step 3: Ensure the output is welcoming and clearly explains what the chatbot is designed to do. Do not include any XML tags in the output.
+Please use the same language as the user's input language. If user uses chinese then generate opening statement in chinese,  if user uses english then generate opening statement in english. 
+Example Input: 
+Provide customer support for an e-commerce website
+Example Output: 
+Welcome! I'm here to assist you with any questions or issues you might have with your shopping experience. Whether you're looking for product information, need help with your order, or have any other inquiries, feel free to ask. I'm friendly, helpful, and ready to support you in any way I can.
+<Task>
+Here is the task description: {{INPUT_TEXT}}
+
+You just need to generate the output
+"""
--- a/api/core/model_runtime/model_providers/tongyi/llm/llm.py
+++ b/api/core/model_runtime/model_providers/tongyi/llm/llm.py
@ -262,6 +262,10 @@ You should also complete the text started with ``` but not tell ``` directly.
        :param prompt_messages: prompt messages
        :return: llm response
        """
+        if response.status_code != 200 and response.status_code != HTTPStatus.OK:
+            raise ServiceUnavailableError(
+                response.message
+            )
        # transform assistant message to prompt message
        assistant_prompt_message = AssistantPromptMessage(
            content=response.output.choices[0].message.content,