feat: 可以进行多轮文献查询

2024-02-18 12:10:53 +08:00 · 2024-02-18 12:10:53 +08:00 · 9d799f1736
commit 9d799f1736
parent edfb1c5475
10 changed files with 219 additions and 175 deletions
--- a/.env.production
+++ b/.env.production
@ -5,4 +5,5 @@ NEXT_PUBLIC_OPENAI_API_KEY=sk-ffe19ebe9fa44d00884330ff1c18cf82
 NEXT_PUBLIC_PAPER_URL=/api/paper
 NEXT_PUBLIC_SEMANTIC_API_KEY=hEQvK6ARe84dzDPcMnpzX4n9jfoqztkMfaftPWnb
 NEXT_PUBLIC_PUBMED_API_KEY=057616e7ce6c722f2ae8679e38a8be9b1a09	
-VERCEL_URL=https://www.paperai.life
+VERCEL_URL=https://www.paperai.life
+NODE_ENV=production
--- a/app/store/slices/authSlice.ts
+++ b/app/store/slices/authSlice.ts
@ -19,7 +19,7 @@ const initialState: APIState = {
          2.文献引用：只引用与主题紧密相关的论文。在引用文献时，文末应使用方括号内的数字来标注引用来源，如 [1]。。请确保每个引用在文章中都有其对应的编号，*无需在文章末尾提供参考文献列表*。*每个文献对应的序号只应该出现一次，比如说引用了第一篇文献文中就只能出现一次[1]*。
          3.忽略无关文献：对于与主题无关的论文，请不要包含在您的写作中。只关注对理解和阐述主题有实质性帮助的资料。
          4.来源明确：在文章中，清楚地指出每个引用的具体来源。引用的信息应准确无误，确保读者能够追溯到原始文献。
-          5.使用中文完成回答,不超过三百字
+          5.使用用户所说的语言完成回答，不超过五百字
          6.只能对给出的文献进行引用，坚决不能虚构文献。
          返回格式举例：
          在某个方面，某论文实现了以下突破...[1],在另一篇论文中，研究了...[2]`,
--- a/components/GetArxiv.tsx
+++ b/components/GetArxiv.tsx
@ -38,12 +38,13 @@ interface Author {
 async function getArxivPapers(
  query: string,
  maxResults = 5,
+  offset = -1,
  sortBy = "submittedDate",
  sortOrder = "descending"
 ) {
-  const maxOffset = 30 - maxResults; // 假设总记录数为 100
-  const start = getRandomOffset(maxOffset);
-  const url = `https://export.arxiv.org/api/query?search_query=${query}&start=${start}&max_results=${maxResults}&sortBy=${sortBy}&sortOrder=${sortOrder}`;
+  const maxOffset = 20 - maxResults; // 假设总记录数为 20
+  if (offset === -1) offset = getRandomOffset(maxOffset);
+  const url = `https://export.arxiv.org/api/query?search_query=${query}&start=${offset}&max_results=${maxResults}&sortBy=${sortBy}&sortOrder=${sortOrder}`;

  try {
    const response = await axios.get(url);
--- a/components/GetPubMed
+++ b/components/GetPubMed
@ -10,14 +10,20 @@ type PubMedID = string;
 // 定义idList为PubMedID数组
 type IDList = PubMedID[];

-async function getPubMedPapers(query: string, year: number, limit = 2) {
+async function getPubMedPapers(
+  query: string,
+  year: number,
+  offset = -1,
+  limit = 2
+) {
  try {
    const baseURL =
      "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi";
    const db = "pubmed"; // 设定搜索的数据库为PubMed
    const retMax = limit; // 检索的最大记录数
-    const retStart = getRandomOffset(20 - limit); // 假设每页最多30条，根据需要随机偏移
-    const url = `${baseURL}?db=${db}&term=${query}[Title/Abstract]+AND+2018:3000[Date - Publication]&retMax=${retMax}&retStart=${retStart}&api_key=${process.env.NEXT_PUBLIC_PUBMED_API_KEY}`;
+    const maxOffset = 20 - limit; // 假设总记录数为 20
+    if (offset === -1) offset = getRandomOffset(maxOffset);
+    const url = `${baseURL}?db=${db}&term=${query}[Title/Abstract]+AND+2018:3000[Date - Publication]&retMax=${retMax}&retStart=${offset}&api_key=${process.env.NEXT_PUBLIC_PUBMED_API_KEY}`;
    const response = await axios.get(url, { responseType: "text" });
    console.log(response.data);
    // 解析XML数据
@ -155,9 +161,14 @@ async function getPubMedPaperDetails(idList: IDList) {
 }

 // 示例：使用这些函数
-async function fetchPubMedData(query: string, year: number, limit: number) {
+async function fetchPubMedData(
+  query: string,
+  year: number,
+  limit: number,
+  offset: number
+) {
  try {
-    const idList = await getPubMedPapers(query, year, limit);
+    const idList = await getPubMedPapers(query, year, offset, limit);
    if (idList && idList.length > 0) {
      const paperDetails = await getPubMedPaperDetails(idList);
      console.log("fetchPubMedData", paperDetails); // 处理或显示文章详情
--- a/components/GetSemantic.tsx
+++ b/components/GetSemantic.tsx
@ -15,10 +15,15 @@ interface Paper {
  url: string;
 }

-async function getSemanticPapers(query: string, year: string, limit = 2) {
+async function getSemanticPapers(
+  query: string,
+  year: string,
+  offset = -1,
+  limit = 2
+) {
  try {
    const maxOffset = 20 - limit; // 假设总记录数为 20
-    const offset = getRandomOffset(maxOffset);
+    if (offset === -1) offset = getRandomOffset(maxOffset);
    const url = `https://api.semanticscholar.org/graph/v1/paper/search`;
    const response = await axios.get(url, {
      headers: {
--- a/components/QuillEditor.tsx
+++ b/components/QuillEditor.tsx
@ -101,6 +101,11 @@ const QEditor = ({ lng }) => {
    "gpt语言模型",
    "gpt-4"
  ); // 默认选项
+  const [generatedPaperNumber, setGeneratedPaperNumber] = useLocalStorage(
+    "生成次数",
+    1
+  ); // 初始值设为1
+
  //redux
  const dispatch = useAppDispatch();
  const references = useAppSelector((state) => state.auth.referencesRedux);
@ -231,16 +236,20 @@ const QEditor = ({ lng }) => {
  const handleInputChange = (event: any) => {
    setUserInput(event.target.value);
  };
-
+  // 处理输入generatedPaperNumber变化的函数
+  const handleGeneratedPaperNumberChange = (event: any) => {
+    const newValue = parseInt(event.target.value, 10);
+    setGeneratedPaperNumber(newValue);
+  };
  // 处理AI写作
  const handleAIWrite = async () => {
-    quill.setSelection(cursorPosition, 0); // 将光标移动到原来的位置
+    quill!.setSelection(cursorPosition!, 0); // 将光标移动到原来的位置

    const prompt = "请帮助用户完成论文写作，使用用户所说的语言完成";
    await sendMessageToOpenAI(
      userInput,
-      quill,
-      selectedModel,
+      quill!,
+      selectedModel!,
      apiKey,
      upsreamUrl,
      prompt
@ -248,132 +257,137 @@ const QEditor = ({ lng }) => {
    // 清空input内容
    setUserInput("");
    // 重新获取更新后的内容并更新 Redux store
-    const updatedContent = quill.root.innerHTML;
+    const updatedContent = quill!.root.innerHTML;
    dispatch(setEditorContent(updatedContent));
  };

  // 处理paper2AI
  async function paper2AI(topic: string) {
-    quill.setSelection(cursorPosition, 0); // 将光标移动到原来的位置
-
-    try {
-      if (!topic) {
-        //使用ai提取当前要请求的论文主题
-        const prompt =
-          "As a topic extraction assistant, you can help me extract the current discussion of the paper topic, I will enter the content of the paper, you extract the paper topic , no more than two, Hyphenated query terms yield no matches (replace it with space to find matches) return format is: topic1 topic2";
-        const userMessage = getTextBeforeCursor(quill, 2000);
-        topic = await getTopicFromAI(userMessage, prompt, apiKey);
-        console.log("topic in AI before removeSpecialCharacters", topic);
-        topic = removeSpecialCharacters(topic);
-        topic = topic.split(" ").slice(0, 2).join(" ");
-        //如果超过十个字符就截断
-        if (topic.length > 10) {
-          topic = topic.slice(0, 10);
+    quill!.setSelection(cursorPosition!, 0); // 将光标移动到原来的位置
+    let offset = -1;
+    if (generatedPaperNumber) offset = 0;
+    for (let i = 0; i < generatedPaperNumber!; i++) {
+      try {
+        if (!topic) {
+          //使用ai提取当前要请求的论文主题
+          const prompt =
+            "As a topic extraction assistant, you can help me extract the current discussion of the paper topic, I will enter the content of the paper, you extract the paper topic , no more than two, Hyphenated query terms yield no matches (replace it with space to find matches) return format is: topic1 topic2";
+          const userMessage = getTextBeforeCursor(quill!, 2000);
+          topic = await getTopicFromAI(userMessage, prompt, apiKey);
+          console.log("topic in AI before removeSpecialCharacters", topic);
+          topic = removeSpecialCharacters(topic);
+          topic = topic.split(" ").slice(0, 2).join(" ");
+          //如果超过十个字符就截断
+          if (topic.length > 10) {
+            topic = topic.slice(0, 10);
+          }
        }
-      }
-      console.log("topic in AI", topic);
-      let rawData, dataString, newReferences;
-      if (selectedSource === "arxiv") {
-        rawData = await getArxivPapers(topic);
-        console.log("arxiv rawdata:", rawData);
-        // 将 rawData 转换为引用数组
-        newReferences = rawData.map((entry) => ({
-          url: entry.id,
-          title: entry.title,
-          year: entry.published,
-          author: entry.authors?.slice(0, 3).join(", "),
-        }));
-        dataString = rawData
-          .map((entry) => {
-            return `ID: ${entry.id}\nTime: ${entry.published}\nTitle: ${entry.title}\nSummary: ${entry.summary}\n\n`;
-          })
-          .join("");
-      } else if (selectedSource === "semanticScholar") {
-        rawData = await getSemanticPapers(topic, "2015-2023");
-        // 将 rawData 转换为引用数组
-        newReferences = rawData.map((entry) => ({
-          url: entry.url,
-          title: entry.title,
-          year: entry.year,
-          author: entry.authors?.slice(0, 3).join(", "),
-          venue: entry.venue,
-          journal: formatJournalReference(entry),
-        }));
-        dataString = rawData
-          .map((entry) => {
-            return `Time: ${entry.year}\nTitle: ${entry.title}\nSummary: ${entry.abstract}\n\n`;
-          })
-          .join("");
-      } else if (selectedSource === "pubmed") {
-        rawData = await fetchPubMedData(topic, 2020, 2);
-        if (!rawData) {
-          throw new Error("未搜索到文献 from PubMed.");
+        console.log("topic in AI", topic);
+        let rawData, dataString, newReferences;
+        if (selectedSource === "arxiv") {
+          rawData = await getArxivPapers(topic);
+          console.log("arxiv rawdata:", rawData);
+          // 将 rawData 转换为引用数组
+          newReferences = rawData.map((entry: any) => ({
+            url: entry.id,
+            title: entry.title,
+            year: entry.published,
+            author: entry.authors?.slice(0, 3).join(", "),
+          }));
+          dataString = rawData
+            .map((entry: any) => {
+              return `ID: ${entry.id}\nTime: ${entry.published}\nTitle: ${entry.title}\nSummary: ${entry.summary}\n\n`;
+            })
+            .join("");
+        } else if (selectedSource === "semanticScholar") {
+          rawData = await getSemanticPapers(topic, "2015-2023", offset);
+          // 将 rawData 转换为引用数组
+          newReferences = rawData.map((entry: any) => ({
+            url: entry.url,
+            title: entry.title,
+            year: entry.year,
+            author: entry.authors?.slice(0, 3).join(", "),
+            venue: entry.venue,
+            journal: formatJournalReference(entry),
+          }));
+          dataString = rawData
+            .map((entry: any) => {
+              return `Time: ${entry.year}\nTitle: ${entry.title}\nSummary: ${entry.abstract}\n\n`;
+            })
+            .join("");
+        } else if (selectedSource === "pubmed") {
+          rawData = await fetchPubMedData(topic, 2020, offset, 2);
+          if (!rawData) {
+            throw new Error("未搜索到文献 from PubMed.");
+          }
+          newReferences = rawData.map((entry: any) => ({
+            id: entry.id, // 文章的 PubMed ID
+            title: entry.title, // 文章的标题
+            abstract: entry.abstract, // 文章的摘要
+            author: entry.authors?.slice(0, 3).join(", "), // 文章的作者列表，假设为字符串数组
+            year: entry.year, // 文章的发表日期
+            journal: entry.journal, // 文章的发表杂志
+            url: entry.url, // 文章的 URL
+            source: "PubMed", // 指示这些引用来自 PubMed
+          }));
+
+          // 打印或进一步处理 newReferences
+          console.log(newReferences);
+
+          dataString = rawData
+            .map((entry: any) => {
+              return `Time: ${entry.year}\nTitle: ${entry.title}\nSummary: ${entry.abstract}\n\n`;
+            })
+            .join("");
        }
-        newReferences = rawData.map((entry) => ({
-          id: entry.id, // 文章的 PubMed ID
-          title: entry.title, // 文章的标题
-          abstract: entry.abstract, // 文章的摘要
-          author: entry.authors?.slice(0, 3).join(", "), // 文章的作者列表，假设为字符串数组
-          year: entry.year, // 文章的发表日期
-          journal: entry.journal, // 文章的发表杂志
-          url: entry.url, // 文章的 URL
-          source: "PubMed", // 指示这些引用来自 PubMed
-        }));
-
-        // 打印或进一步处理 newReferences
-        console.log(newReferences);
-
-        dataString = rawData
-          .map((entry) => {
-            return `Time: ${entry.year}\nTitle: ${entry.title}\nSummary: ${entry.abstract}\n\n`;
+        //在对应的位置添加文献
+        const nearestNumber = getNumberBeforeCursor(quill!);
+        dispatch(
+          addReferencesRedux({
+            references: newReferences,
+            position: nearestNumber,
          })
-          .join("");
-      }
-      //在对应的位置添加文献
-      const nearestNumber = getNumberBeforeCursor(quill);
-      dispatch(
-        addReferencesRedux({
-          references: newReferences,
-          position: nearestNumber,
-        })
-      );
-
-      // 确保搜索到的论文不超过 3000 个字符
-      const trimmedMessage =
-        dataString.length > 3000 ? dataString.slice(0, 3000) : dataString;
-      //slate的方法
-      // const content = `需要完成的论文主题：${topic},  搜索到的论文内容:${trimmedMessage},之前已经完成的内容上下文：${extractText(
-      //   editorValue
-      // )}`;
-      const content = `之前用户已经完成的内容上下文：${getTextBeforeCursor(
-        quill,
-        500
-      )},搜索到的论文内容:${trimmedMessage},需要完成的论文主题：${topic},请根据搜索到的论文内容完成用户的论文`;
-      await sendMessageToOpenAI(
-        content,
-        quill,
-        selectedModel,
-        apiKey,
-        upsreamUrl,
-        systemPrompt
-      );
-      setUserInput("");
-      // 重新获取更新后的内容并更新 Redux store
-      const updatedContent = quill.root.innerHTML;
-      dispatch(setEditorContent(updatedContent));
-      if (isVip) {
-        //在云端同步supabase
-        const data = await submitPaper(
-          supabase,
-          updatedContent,
-          references,
-          paperNumberRedux
        );
+
+        // 确保搜索到的论文不超过 3000 个字符
+        const trimmedMessage =
+          dataString.length > 3000 ? dataString.slice(0, 3000) : dataString;
+        //slate的方法
+        // const content = `需要完成的论文主题：${topic},  搜索到的论文内容:${trimmedMessage},之前已经完成的内容上下文：${extractText(
+        //   editorValue
+        // )}`;
+        const content = `之前用户已经完成的内容上下文：${getTextBeforeCursor(
+          quill!,
+          900
+        )},搜索到的论文内容:${trimmedMessage},需要完成的论文主题：${topic},请根据搜索到的论文内容完成用户的论文`;
+        await sendMessageToOpenAI(
+          content,
+          quill!,
+          selectedModel!,
+          apiKey,
+          upsreamUrl,
+          systemPrompt
+        );
+        setUserInput("");
+        // 重新获取更新后的内容并更新 Redux store
+        const updatedContent = quill!.root.innerHTML;
+        dispatch(setEditorContent(updatedContent));
+        if (isVip) {
+          //在云端同步supabase
+          const data = await submitPaper(
+            supabase,
+            updatedContent,
+            references,
+            paperNumberRedux
+          );
+        }
+        //修改offset使得按照接下来的顺序进行获取文献
+        offset += 2;
+      } catch (error) {
+        // console.error("Error fetching data:", error);
+        // 在处理错误后，再次抛出这个错误
+        throw new Error(`Paper2AI出现错误: ${error}`);
      }
-    } catch (error) {
-      // console.error("Error fetching data:", error);
-      // 在处理错误后，再次抛出这个错误
-      throw new Error(`Paper2AI出现错误: ${error}`);
    }
  }

@ -414,12 +428,18 @@ const QEditor = ({ lng }) => {
        <select
          value={selectedModel}
          onChange={(e) => setSelectedModel(e.target.value)}
-          className=" border border-gray-300 bg-white py-2 px-3 rounded leading-tight focus:outline-none focus:bg-white focus:border-gray-500"
+          className=" border border-gray-300 bg-white py-2 px-3 rounded leading-tight focus:outline-none focus:bg-white focus:border-gray-500 "
        >
          <option value="gpt-3.5-turbo">gpt-3.5-turbo</option>
          <option value="gpt-4">gpt-4</option>
          <option value="deepseek-chat">deepseek-chat</option>
        </select>
+        <input
+          type="number"
+          value={generatedPaperNumber}
+          onChange={handleGeneratedPaperNumberChange}
+          className="border border-gray-300 text-gray-700 text-sm p-1 rounded w-16"
+        />
        <button
          onClick={() => formatTextInEditor(quill)} // 假设 updateIndex 是处理更新操作的函数
          className="bg-gray-300 hover:bg-gray-400 text-black font-bold py-2 px-4 rounded"
--- a/components/chatAI.tsx
+++ b/components/chatAI.tsx
@ -1,5 +1,7 @@
 import { Transforms } from "slate";
 import { Editor } from "slate";
+import Quill from "quill";
+
 import { extractText } from "@/utils/others/slateutils";
 import {
  updateBracketNumbersInDeltaKeepSelection,
@ -20,8 +22,8 @@ function isValidApiKey(apiKey: string) {

 const sendMessageToOpenAI = async (
  content: string,
-  editor: Editor,
-  selectedModel: "gpt3.5",
+  editor: Quill,
+  selectedModel: string,
  apiKey: string,
  upsreamUrl: string,
  prompt?: string
@ -54,7 +56,7 @@ const sendMessageToOpenAI = async (
          2.文献引用：只引用与主题紧密相关的论文。在引用文献时，文末应使用方括号内的数字来标注引用来源，如 [1]。。请确保每个引用在文章中都有其对应的编号，*无需在文章末尾提供参考文献列表*。*每个文献对应的序号只应该出现一次，比如说引用了第一篇文献文中就只能出现一次[1]*。
          3.忽略无关文献：对于与主题无关的论文，请不要包含在您的写作中。只关注对理解和阐述主题有实质性帮助的资料。
          4.来源明确：在文章中，清楚地指出每个引用的具体来源。引用的信息应准确无误，确保读者能够追溯到原始文献。
-          5.使用用户所说的语言完成回答,不超过三百字
+          5.使用用户所说的语言完成回答，不超过五百字
          6.只能对给出的文献进行引用，坚决不能虚构文献。
          返回格式举例：
          在某个方面，某论文实现了以下突破...[1],在另一篇论文中，研究了...[2]`,
--- a/sentry.client.config.ts
+++ b/sentry.client.config.ts
@ -4,31 +4,33 @@

 import * as Sentry from "@sentry/nextjs";

-Sentry.init({
-  dsn: "https://523c4056ba48d012c62a377dfc49f647@o4506728662564864.ingest.sentry.io/4506728672264192",
+if (process.env.NODE_ENV === "production") {
+  Sentry.init({
+    dsn: "https://523c4056ba48d012c62a377dfc49f647@o4506728662564864.ingest.sentry.io/4506728672264192",

-  // Adjust this value in production, or use tracesSampler for greater control
-  tracesSampleRate: 1,
+    // Adjust this value in production, or use tracesSampler for greater control
+    tracesSampleRate: 1,

-  // Setting this option to true will print useful information to the console while you're setting up Sentry.
-  debug: false,
+    // Setting this option to true will print useful information to the console while you're setting up Sentry.
+    debug: false,

-  replaysOnErrorSampleRate: 1.0,
+    replaysOnErrorSampleRate: 1.0,

-  // This sets the sample rate to be 10%. You may want this to be 100% while
-  // in development and sample at a lower rate in production
-  replaysSessionSampleRate: 0.1,
+    // This sets the sample rate to be 10%. You may want this to be 100% while
+    // in development and sample at a lower rate in production
+    replaysSessionSampleRate: 0.1,

-  // You can remove this option if you're not planning to use the Sentry Session Replay feature:
-  integrations: [
-    Sentry.replayIntegration({
-      // Additional Replay configuration goes in here, for example:
-      maskAllText: true,
-      blockAllMedia: true,
-    }),
-    Sentry.feedbackIntegration({
-      // Additional SDK configuration goes in here, for example:
-      colorScheme: "light",
-    }),
-  ],
-});
+    // You can remove this option if you're not planning to use the Sentry Session Replay feature:
+    integrations: [
+      Sentry.replayIntegration({
+        // Additional Replay configuration goes in here, for example:
+        maskAllText: true,
+        blockAllMedia: true,
+      }),
+      Sentry.feedbackIntegration({
+        // Additional SDK configuration goes in here, for example:
+        colorScheme: "light",
+      }),
+    ],
+  });
+}
--- a/sentry.edge.config.ts
+++ b/sentry.edge.config.ts
@ -4,13 +4,14 @@
 // https://docs.sentry.io/platforms/javascript/guides/nextjs/

 import * as Sentry from "@sentry/nextjs";
+if (process.env.NODE_ENV === "production") {
+  Sentry.init({
+    dsn: "https://523c4056ba48d012c62a377dfc49f647@o4506728662564864.ingest.sentry.io/4506728672264192",

-Sentry.init({
-  dsn: "https://523c4056ba48d012c62a377dfc49f647@o4506728662564864.ingest.sentry.io/4506728672264192",
+    // Adjust this value in production, or use tracesSampler for greater control
+    tracesSampleRate: 1,

-  // Adjust this value in production, or use tracesSampler for greater control
-  tracesSampleRate: 1,
-
-  // Setting this option to true will print useful information to the console while you're setting up Sentry.
-  debug: false,
-});
+    // Setting this option to true will print useful information to the console while you're setting up Sentry.
+    debug: false,
+  });
+}
--- a/sentry.server.config.ts
+++ b/sentry.server.config.ts
@ -3,13 +3,14 @@
 // https://docs.sentry.io/platforms/javascript/guides/nextjs/

 import * as Sentry from "@sentry/nextjs";
+if (process.env.NODE_ENV === "production") {
+  Sentry.init({
+    dsn: "https://523c4056ba48d012c62a377dfc49f647@o4506728662564864.ingest.sentry.io/4506728672264192",

-Sentry.init({
-  dsn: "https://523c4056ba48d012c62a377dfc49f647@o4506728662564864.ingest.sentry.io/4506728672264192",
+    // Adjust this value in production, or use tracesSampler for greater control
+    tracesSampleRate: 1,

-  // Adjust this value in production, or use tracesSampler for greater control
-  tracesSampleRate: 1,
-
-  // Setting this option to true will print useful information to the console while you're setting up Sentry.
-  debug: false,
-});
+    // Setting this option to true will print useful information to the console while you're setting up Sentry.
+    debug: false,
+  });
+}