Add DuckDuckGo News Search and Video Search

2024-11-15 19:22:36 +08:00 · 2024-11-14 01:23:53 -08:00 · 2024-11-14 01:23:53 -08:00 · 11e1720be0
commit 11e1720be0
parent ac0fed6402
5 changed files with 302 additions and 1 deletions
--- a/api/core/tools/provider/builtin/duckduckgo/tools/ddgo_news.py
+++ b/api/core/tools/provider/builtin/duckduckgo/tools/ddgo_news.py
@ -0,0 +1,83 @@
 from typing import Any
 from duckduckgo_search import DDGS
 from core.model_runtime.entities.message_entities import SystemPromptMessage
 from core.tools.entities.tool_entities import ToolInvokeMessage
 from core.tools.tool.builtin_tool import BuiltinTool
 SUMMARY_PROMPT = """
 User's query: 
 {query}
 Here are the news results:
 {content}
 Please summarize the news in a few sentences.
 """
 class DuckDuckGoNewsSearchTool(BuiltinTool):
    """
    Tool for performing a news search using DuckDuckGo search engine.
    """
    def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> ToolInvokeMessage | list[ToolInvokeMessage]:
        query_dict = {
            "keywords": tool_parameters.get("query"),
            "timelimit": tool_parameters.get("timelimit"),
            "max_results": tool_parameters.get("max_results"),
            "safesearch": "moderate",
            "region": "wt-wt"
        }
        try:
            response = list(DDGS().news(**query_dict))
            if not response:
                return [self.create_text_message("No news found matching your criteria.")]
        except Exception as e:
            return [self.create_text_message(f"Error searching news: {str(e)}")]
        require_summary = tool_parameters.get("require_summary", False)
        if require_summary:
            results = "\n".join([f"{res.get('title')}: {res.get('body')}" for res in response])
            results = self.summary_results(user_id=user_id, content=results, query=query_dict["keywords"])
            return self.create_text_message(text=results)
        # Create rich markdown content for each news item
        markdown_result = "\n\n"
        json_result = []
        for res in response:
            markdown_result += f"### {res.get('title', 'Untitled')}\n\n"
            if res.get('date'):
                markdown_result += f"**Date:** {res.get('date')}\n\n"
            if res.get('body'):
                markdown_result += f"{res.get('body')}\n\n"
            if res.get('source'):
                markdown_result += f"*Source: {res.get('source')}*\n\n"
            if res.get('image'):
                markdown_result += f"![{res.get('title', '')}]({res.get('image')})\n\n"
            markdown_result += f"[Read more]({res.get('url', '')})\n\n---\n\n"
            json_result.append(self.create_json_message({
                "title": res.get("title", ""),
                "date": res.get("date", ""),
                "body": res.get("body", ""),
                "url": res.get("url", ""),
                "image": res.get("image", ""),
                "source": res.get("source", "")
            }))
        return [self.create_text_message(markdown_result)] + json_result
    def summary_results(self, user_id: str, content: str, query: str) -> str:
        prompt = SUMMARY_PROMPT.format(query=query, content=content)
        summary = self.invoke_model(
            user_id=user_id,
            prompt_messages=[
                SystemPromptMessage(content=prompt),
            ],
            stop=[],
        )
        return summary.message.content 
--- a/api/core/tools/provider/builtin/duckduckgo/tools/ddgo_news.yaml
+++ b/api/core/tools/provider/builtin/duckduckgo/tools/ddgo_news.yaml
@ -0,0 +1,71 @@
 identity:
  name: ddgo_news
  author: Assistant
  label:
    en_US: DuckDuckGo News Search
    zh_Hans: DuckDuckGo 新闻搜索
 description:
  human:
    en_US: Perform news searches on DuckDuckGo and get results.
    zh_Hans: 在 DuckDuckGo 上进行新闻搜索并获取结果。
  llm: Perform news searches on DuckDuckGo and get results.
 parameters:
  - name: query
    type: string
    required: true
    label:
      en_US: Query String
      zh_Hans: 查询语句
    human_description:
      en_US: Search Query.
      zh_Hans: 搜索查询语句。
    llm_description: Key words for searching
    form: llm
  - name: max_results
    type: number
    required: true
    default: 5
    label:
      en_US: Max Results
      zh_Hans: 最大结果数量
    human_description:
      en_US: The Max Results
      zh_Hans: 最大结果数量
    form: form
  - name: timelimit
    type: select
    required: false
    options:
      - value: Day
        label:
          en_US: Current Day
          zh_Hans: 当天
      - value: Week
        label:
          en_US: Current Week
          zh_Hans: 本周
      - value: Month
        label:
          en_US: Current Month
          zh_Hans: 当月
      - value: Year
        label:
          en_US: Current Year
          zh_Hans: 今年
    label:
      en_US: Result Time Limit
      zh_Hans: 结果时间限制
    human_description:
      en_US: Use when querying results within a specific time range only.
      zh_Hans: 只查询一定时间范围内的结果时使用
    form: form
  - name: require_summary
    type: boolean
    default: false
    label:
      en_US: Require Summary
      zh_Hans: 是否总结
    human_description:
      en_US: Whether to pass the news results to llm for summarization.
      zh_Hans: 是否需要将新闻结果传给大模型总结
    form: form 
--- a/api/core/tools/provider/builtin/duckduckgo/tools/ddgo_video.py
+++ b/api/core/tools/provider/builtin/duckduckgo/tools/ddgo_video.py
@ -0,0 +1,61 @@
 from typing import Any
 from duckduckgo_search import DDGS
 from core.tools.entities.tool_entities import ToolInvokeMessage
 from core.tools.tool.builtin_tool import BuiltinTool
 class DuckDuckGoVideoSearchTool(BuiltinTool):
    """
    Tool for performing a video search using DuckDuckGo search engine.
    """
    def _invoke(self, user_id: str, tool_parameters: dict[str, Any]) -> list[ToolInvokeMessage]:
        query_dict = {
            "keywords": tool_parameters.get("query"),
            "region": tool_parameters.get("region", "wt-wt"),
            "safesearch": tool_parameters.get("safesearch", "moderate"),
            "timelimit": tool_parameters.get("timelimit"),
            "resolution": tool_parameters.get("resolution"),
            "duration": tool_parameters.get("duration"),
            "license_videos": tool_parameters.get("license_videos"),
            "max_results": tool_parameters.get("max_results"),
        }
        # Remove None values to use API defaults
        query_dict = {k: v for k, v in query_dict.items() if v is not None}
        response = DDGS().videos(**query_dict)
        # Create HTML result with embedded iframes
        markdown_result = "\n\n"
        json_result = []
        for res in response:
            title = res.get('title', '')
            embed_html = res.get('embed_html', '')
            description = res.get('description', '')
            # Modify iframe to be responsive
            if embed_html:
                # Replace fixed dimensions with responsive wrapper and iframe
                embed_html = """
 <div style="position: relative; padding-bottom: 56.25%; height: 0; overflow: hidden; max-width: 100%; border-radius: 8px;">
    <iframe 
        style="position: absolute; top: 0; left: 0; width: 100%; height: 100%;" 
        src="{src}" 
        frameborder="0" 
        allowfullscreen>
    </iframe>
 </div>""".format(
                    src=res.get('embed_url', '')
                )
            markdown_result += f"{title}\n\n"
            markdown_result += f"{embed_html}\n\n"
            markdown_result += "---\n\n"
            json_result.append(self.create_json_message(res))
        return [self.create_text_message(markdown_result)] + json_result 
--- a/api/core/tools/provider/builtin/duckduckgo/tools/ddgo_video.yaml
+++ b/api/core/tools/provider/builtin/duckduckgo/tools/ddgo_video.yaml
@ -0,0 +1,86 @@
 identity:
  name: ddgo_video
  author: Assistant
  label:
    en_US: DuckDuckGo Video Search
    zh_Hans: DuckDuckGo 视频搜索
 description:
  human:
    en_US: Perform video searches on DuckDuckGo and get results with embedded videos.
    zh_Hans: 在 DuckDuckGo 上进行视频搜索并获取可嵌入的视频结果。
  llm: Perform video searches on DuckDuckGo and get results with embedded videos.
 parameters:
  - name: query
    type: string
    required: true
    label:
      en_US: Query String
      zh_Hans: 查询语句
    human_description:
      en_US: Search Query
      zh_Hans: 搜索查询语句。
    llm_description: Key words for searching
    form: llm
  - name: max_results
    type: number
    required: true
    default: 3
    minimum: 1
    maximum: 10
    label:
      en_US: Max Results
      zh_Hans: 最大结果数量
    human_description:
      en_US: The max results (1-10).
      zh_Hans: 最大结果数量（1-10）。
    form: form
  - name: timelimit
    type: select
    required: false
    options:
      - value: Day
        label:
          en_US: Current Day
          zh_Hans: 当天
      - value: Week
        label:
          en_US: Current Week
          zh_Hans: 本周
      - value: Month
        label:
          en_US: Current Month
          zh_Hans: 当月
      - value: Year
        label:
          en_US: Current Year
          zh_Hans: 今年
    label:
      en_US: Result Time Limit
      zh_Hans: 结果时间限制
    human_description:
      en_US: Use when querying results within a specific time range only.
      zh_Hans: 只查询一定时间范围内的结果时使用
    form: form
  - name: duration
    type: select
    required: false
    options:
      - value: short
        label:
          en_US: Short (<4 minutes)
          zh_Hans: 短视频（<4分钟）
      - value: medium
        label:
          en_US: Medium (4-20 minutes)
          zh_Hans: 中等（4-20分钟）
      - value: long
        label:
          en_US: Long (>20 minutes)
          zh_Hans: 长视频（>20分钟）
    label:
      en_US: Video Duration
      zh_Hans: 视频时长
    human_description:
      en_US: Filter videos by duration
      zh_Hans: 按时长筛选视频
    form: form 
--- a/web/app/components/base/markdown.tsx
+++ b/web/app/components/base/markdown.tsx
@ -265,7 +265,7 @@ export function Markdown(props: { content: string; className?: string }) {
            }
          },
        ]}
-        disallowedElements={['script', 'iframe', 'head', 'html', 'meta', 'link', 'style', 'body']}
+        disallowedElements={['script', 'head', 'html', 'meta', 'link', 'style', 'body']}
        components={{
          code: CodeBlock,
          img: Img,