feat: 可以进行多轮文献查询

This commit is contained in:
liuweiqing 2024-02-18 12:10:53 +08:00
parent edfb1c5475
commit 9d799f1736
10 changed files with 219 additions and 175 deletions

View File

@ -5,4 +5,5 @@ NEXT_PUBLIC_OPENAI_API_KEY=sk-ffe19ebe9fa44d00884330ff1c18cf82
NEXT_PUBLIC_PAPER_URL=/api/paper
NEXT_PUBLIC_SEMANTIC_API_KEY=hEQvK6ARe84dzDPcMnpzX4n9jfoqztkMfaftPWnb
NEXT_PUBLIC_PUBMED_API_KEY=057616e7ce6c722f2ae8679e38a8be9b1a09
VERCEL_URL=https://www.paperai.life
VERCEL_URL=https://www.paperai.life
NODE_ENV=production

View File

@ -19,7 +19,7 @@ const initialState: APIState = {
2.使 [1]***[1]*
3.
4.
5.使,
5.使
6.
...[1],...[2]`,

View File

@ -38,12 +38,13 @@ interface Author {
async function getArxivPapers(
query: string,
maxResults = 5,
offset = -1,
sortBy = "submittedDate",
sortOrder = "descending"
) {
const maxOffset = 30 - maxResults; // 假设总记录数为 100
const start = getRandomOffset(maxOffset);
const url = `https://export.arxiv.org/api/query?search_query=${query}&start=${start}&max_results=${maxResults}&sortBy=${sortBy}&sortOrder=${sortOrder}`;
const maxOffset = 20 - maxResults; // 假设总记录数为 20
if (offset === -1) offset = getRandomOffset(maxOffset);
const url = `https://export.arxiv.org/api/query?search_query=${query}&start=${offset}&max_results=${maxResults}&sortBy=${sortBy}&sortOrder=${sortOrder}`;
try {
const response = await axios.get(url);

View File

@ -10,14 +10,20 @@ type PubMedID = string;
// 定义idList为PubMedID数组
type IDList = PubMedID[];
async function getPubMedPapers(query: string, year: number, limit = 2) {
async function getPubMedPapers(
query: string,
year: number,
offset = -1,
limit = 2
) {
try {
const baseURL =
"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi";
const db = "pubmed"; // 设定搜索的数据库为PubMed
const retMax = limit; // 检索的最大记录数
const retStart = getRandomOffset(20 - limit); // 假设每页最多30条根据需要随机偏移
const url = `${baseURL}?db=${db}&term=${query}[Title/Abstract]+AND+2018:3000[Date - Publication]&retMax=${retMax}&retStart=${retStart}&api_key=${process.env.NEXT_PUBLIC_PUBMED_API_KEY}`;
const maxOffset = 20 - limit; // 假设总记录数为 20
if (offset === -1) offset = getRandomOffset(maxOffset);
const url = `${baseURL}?db=${db}&term=${query}[Title/Abstract]+AND+2018:3000[Date - Publication]&retMax=${retMax}&retStart=${offset}&api_key=${process.env.NEXT_PUBLIC_PUBMED_API_KEY}`;
const response = await axios.get(url, { responseType: "text" });
console.log(response.data);
// 解析XML数据
@ -155,9 +161,14 @@ async function getPubMedPaperDetails(idList: IDList) {
}
// 示例:使用这些函数
async function fetchPubMedData(query: string, year: number, limit: number) {
async function fetchPubMedData(
query: string,
year: number,
limit: number,
offset: number
) {
try {
const idList = await getPubMedPapers(query, year, limit);
const idList = await getPubMedPapers(query, year, offset, limit);
if (idList && idList.length > 0) {
const paperDetails = await getPubMedPaperDetails(idList);
console.log("fetchPubMedData", paperDetails); // 处理或显示文章详情

View File

@ -15,10 +15,15 @@ interface Paper {
url: string;
}
async function getSemanticPapers(query: string, year: string, limit = 2) {
async function getSemanticPapers(
query: string,
year: string,
offset = -1,
limit = 2
) {
try {
const maxOffset = 20 - limit; // 假设总记录数为 20
const offset = getRandomOffset(maxOffset);
if (offset === -1) offset = getRandomOffset(maxOffset);
const url = `https://api.semanticscholar.org/graph/v1/paper/search`;
const response = await axios.get(url, {
headers: {

View File

@ -101,6 +101,11 @@ const QEditor = ({ lng }) => {
"gpt语言模型",
"gpt-4"
); // 默认选项
const [generatedPaperNumber, setGeneratedPaperNumber] = useLocalStorage(
"生成次数",
1
); // 初始值设为1
//redux
const dispatch = useAppDispatch();
const references = useAppSelector((state) => state.auth.referencesRedux);
@ -231,16 +236,20 @@ const QEditor = ({ lng }) => {
const handleInputChange = (event: any) => {
setUserInput(event.target.value);
};
// 处理输入generatedPaperNumber变化的函数
const handleGeneratedPaperNumberChange = (event: any) => {
const newValue = parseInt(event.target.value, 10);
setGeneratedPaperNumber(newValue);
};
// 处理AI写作
const handleAIWrite = async () => {
quill.setSelection(cursorPosition, 0); // 将光标移动到原来的位置
quill!.setSelection(cursorPosition!, 0); // 将光标移动到原来的位置
const prompt = "请帮助用户完成论文写作,使用用户所说的语言完成";
await sendMessageToOpenAI(
userInput,
quill,
selectedModel,
quill!,
selectedModel!,
apiKey,
upsreamUrl,
prompt
@ -248,132 +257,137 @@ const QEditor = ({ lng }) => {
// 清空input内容
setUserInput("");
// 重新获取更新后的内容并更新 Redux store
const updatedContent = quill.root.innerHTML;
const updatedContent = quill!.root.innerHTML;
dispatch(setEditorContent(updatedContent));
};
// 处理paper2AI
async function paper2AI(topic: string) {
quill.setSelection(cursorPosition, 0); // 将光标移动到原来的位置
try {
if (!topic) {
//使用ai提取当前要请求的论文主题
const prompt =
"As a topic extraction assistant, you can help me extract the current discussion of the paper topic, I will enter the content of the paper, you extract the paper topic , no more than two, Hyphenated query terms yield no matches (replace it with space to find matches) return format is: topic1 topic2";
const userMessage = getTextBeforeCursor(quill, 2000);
topic = await getTopicFromAI(userMessage, prompt, apiKey);
console.log("topic in AI before removeSpecialCharacters", topic);
topic = removeSpecialCharacters(topic);
topic = topic.split(" ").slice(0, 2).join(" ");
//如果超过十个字符就截断
if (topic.length > 10) {
topic = topic.slice(0, 10);
quill!.setSelection(cursorPosition!, 0); // 将光标移动到原来的位置
let offset = -1;
if (generatedPaperNumber) offset = 0;
for (let i = 0; i < generatedPaperNumber!; i++) {
try {
if (!topic) {
//使用ai提取当前要请求的论文主题
const prompt =
"As a topic extraction assistant, you can help me extract the current discussion of the paper topic, I will enter the content of the paper, you extract the paper topic , no more than two, Hyphenated query terms yield no matches (replace it with space to find matches) return format is: topic1 topic2";
const userMessage = getTextBeforeCursor(quill!, 2000);
topic = await getTopicFromAI(userMessage, prompt, apiKey);
console.log("topic in AI before removeSpecialCharacters", topic);
topic = removeSpecialCharacters(topic);
topic = topic.split(" ").slice(0, 2).join(" ");
//如果超过十个字符就截断
if (topic.length > 10) {
topic = topic.slice(0, 10);
}
}
}
console.log("topic in AI", topic);
let rawData, dataString, newReferences;
if (selectedSource === "arxiv") {
rawData = await getArxivPapers(topic);
console.log("arxiv rawdata:", rawData);
// 将 rawData 转换为引用数组
newReferences = rawData.map((entry) => ({
url: entry.id,
title: entry.title,
year: entry.published,
author: entry.authors?.slice(0, 3).join(", "),
}));
dataString = rawData
.map((entry) => {
return `ID: ${entry.id}\nTime: ${entry.published}\nTitle: ${entry.title}\nSummary: ${entry.summary}\n\n`;
})
.join("");
} else if (selectedSource === "semanticScholar") {
rawData = await getSemanticPapers(topic, "2015-2023");
// 将 rawData 转换为引用数组
newReferences = rawData.map((entry) => ({
url: entry.url,
title: entry.title,
year: entry.year,
author: entry.authors?.slice(0, 3).join(", "),
venue: entry.venue,
journal: formatJournalReference(entry),
}));
dataString = rawData
.map((entry) => {
return `Time: ${entry.year}\nTitle: ${entry.title}\nSummary: ${entry.abstract}\n\n`;
})
.join("");
} else if (selectedSource === "pubmed") {
rawData = await fetchPubMedData(topic, 2020, 2);
if (!rawData) {
throw new Error("未搜索到文献 from PubMed.");
console.log("topic in AI", topic);
let rawData, dataString, newReferences;
if (selectedSource === "arxiv") {
rawData = await getArxivPapers(topic);
console.log("arxiv rawdata:", rawData);
// 将 rawData 转换为引用数组
newReferences = rawData.map((entry: any) => ({
url: entry.id,
title: entry.title,
year: entry.published,
author: entry.authors?.slice(0, 3).join(", "),
}));
dataString = rawData
.map((entry: any) => {
return `ID: ${entry.id}\nTime: ${entry.published}\nTitle: ${entry.title}\nSummary: ${entry.summary}\n\n`;
})
.join("");
} else if (selectedSource === "semanticScholar") {
rawData = await getSemanticPapers(topic, "2015-2023", offset);
// 将 rawData 转换为引用数组
newReferences = rawData.map((entry: any) => ({
url: entry.url,
title: entry.title,
year: entry.year,
author: entry.authors?.slice(0, 3).join(", "),
venue: entry.venue,
journal: formatJournalReference(entry),
}));
dataString = rawData
.map((entry: any) => {
return `Time: ${entry.year}\nTitle: ${entry.title}\nSummary: ${entry.abstract}\n\n`;
})
.join("");
} else if (selectedSource === "pubmed") {
rawData = await fetchPubMedData(topic, 2020, offset, 2);
if (!rawData) {
throw new Error("未搜索到文献 from PubMed.");
}
newReferences = rawData.map((entry: any) => ({
id: entry.id, // 文章的 PubMed ID
title: entry.title, // 文章的标题
abstract: entry.abstract, // 文章的摘要
author: entry.authors?.slice(0, 3).join(", "), // 文章的作者列表,假设为字符串数组
year: entry.year, // 文章的发表日期
journal: entry.journal, // 文章的发表杂志
url: entry.url, // 文章的 URL
source: "PubMed", // 指示这些引用来自 PubMed
}));
// 打印或进一步处理 newReferences
console.log(newReferences);
dataString = rawData
.map((entry: any) => {
return `Time: ${entry.year}\nTitle: ${entry.title}\nSummary: ${entry.abstract}\n\n`;
})
.join("");
}
newReferences = rawData.map((entry) => ({
id: entry.id, // 文章的 PubMed ID
title: entry.title, // 文章的标题
abstract: entry.abstract, // 文章的摘要
author: entry.authors?.slice(0, 3).join(", "), // 文章的作者列表,假设为字符串数组
year: entry.year, // 文章的发表日期
journal: entry.journal, // 文章的发表杂志
url: entry.url, // 文章的 URL
source: "PubMed", // 指示这些引用来自 PubMed
}));
// 打印或进一步处理 newReferences
console.log(newReferences);
dataString = rawData
.map((entry) => {
return `Time: ${entry.year}\nTitle: ${entry.title}\nSummary: ${entry.abstract}\n\n`;
//在对应的位置添加文献
const nearestNumber = getNumberBeforeCursor(quill!);
dispatch(
addReferencesRedux({
references: newReferences,
position: nearestNumber,
})
.join("");
}
//在对应的位置添加文献
const nearestNumber = getNumberBeforeCursor(quill);
dispatch(
addReferencesRedux({
references: newReferences,
position: nearestNumber,
})
);
// 确保搜索到的论文不超过 3000 个字符
const trimmedMessage =
dataString.length > 3000 ? dataString.slice(0, 3000) : dataString;
//slate的方法
// const content = `需要完成的论文主题:${topic}, 搜索到的论文内容:${trimmedMessage},之前已经完成的内容上下文:${extractText(
// editorValue
// )}`;
const content = `之前用户已经完成的内容上下文:${getTextBeforeCursor(
quill,
500
)},搜索到的论文内容:${trimmedMessage},${topic},`;
await sendMessageToOpenAI(
content,
quill,
selectedModel,
apiKey,
upsreamUrl,
systemPrompt
);
setUserInput("");
// 重新获取更新后的内容并更新 Redux store
const updatedContent = quill.root.innerHTML;
dispatch(setEditorContent(updatedContent));
if (isVip) {
//在云端同步supabase
const data = await submitPaper(
supabase,
updatedContent,
references,
paperNumberRedux
);
// 确保搜索到的论文不超过 3000 个字符
const trimmedMessage =
dataString.length > 3000 ? dataString.slice(0, 3000) : dataString;
//slate的方法
// const content = `需要完成的论文主题:${topic}, 搜索到的论文内容:${trimmedMessage},之前已经完成的内容上下文:${extractText(
// editorValue
// )}`;
const content = `之前用户已经完成的内容上下文:${getTextBeforeCursor(
quill!,
900
)},搜索到的论文内容:${trimmedMessage},${topic},`;
await sendMessageToOpenAI(
content,
quill!,
selectedModel!,
apiKey,
upsreamUrl,
systemPrompt
);
setUserInput("");
// 重新获取更新后的内容并更新 Redux store
const updatedContent = quill!.root.innerHTML;
dispatch(setEditorContent(updatedContent));
if (isVip) {
//在云端同步supabase
const data = await submitPaper(
supabase,
updatedContent,
references,
paperNumberRedux
);
}
//修改offset使得按照接下来的顺序进行获取文献
offset += 2;
} catch (error) {
// console.error("Error fetching data:", error);
// 在处理错误后,再次抛出这个错误
throw new Error(`Paper2AI出现错误: ${error}`);
}
} catch (error) {
// console.error("Error fetching data:", error);
// 在处理错误后,再次抛出这个错误
throw new Error(`Paper2AI出现错误: ${error}`);
}
}
@ -414,12 +428,18 @@ const QEditor = ({ lng }) => {
<select
value={selectedModel}
onChange={(e) => setSelectedModel(e.target.value)}
className=" border border-gray-300 bg-white py-2 px-3 rounded leading-tight focus:outline-none focus:bg-white focus:border-gray-500"
className=" border border-gray-300 bg-white py-2 px-3 rounded leading-tight focus:outline-none focus:bg-white focus:border-gray-500 "
>
<option value="gpt-3.5-turbo">gpt-3.5-turbo</option>
<option value="gpt-4">gpt-4</option>
<option value="deepseek-chat">deepseek-chat</option>
</select>
<input
type="number"
value={generatedPaperNumber}
onChange={handleGeneratedPaperNumberChange}
className="border border-gray-300 text-gray-700 text-sm p-1 rounded w-16"
/>
<button
onClick={() => formatTextInEditor(quill)} // 假设 updateIndex 是处理更新操作的函数
className="bg-gray-300 hover:bg-gray-400 text-black font-bold py-2 px-4 rounded"

View File

@ -1,5 +1,7 @@
import { Transforms } from "slate";
import { Editor } from "slate";
import Quill from "quill";
import { extractText } from "@/utils/others/slateutils";
import {
updateBracketNumbersInDeltaKeepSelection,
@ -20,8 +22,8 @@ function isValidApiKey(apiKey: string) {
const sendMessageToOpenAI = async (
content: string,
editor: Editor,
selectedModel: "gpt3.5",
editor: Quill,
selectedModel: string,
apiKey: string,
upsreamUrl: string,
prompt?: string
@ -54,7 +56,7 @@ const sendMessageToOpenAI = async (
2.使 [1]***[1]*
3.
4.
5.使,
5.使
6.
...[1],...[2]`,

View File

@ -4,31 +4,33 @@
import * as Sentry from "@sentry/nextjs";
Sentry.init({
dsn: "https://523c4056ba48d012c62a377dfc49f647@o4506728662564864.ingest.sentry.io/4506728672264192",
if (process.env.NODE_ENV === "production") {
Sentry.init({
dsn: "https://523c4056ba48d012c62a377dfc49f647@o4506728662564864.ingest.sentry.io/4506728672264192",
// Adjust this value in production, or use tracesSampler for greater control
tracesSampleRate: 1,
// Adjust this value in production, or use tracesSampler for greater control
tracesSampleRate: 1,
// Setting this option to true will print useful information to the console while you're setting up Sentry.
debug: false,
// Setting this option to true will print useful information to the console while you're setting up Sentry.
debug: false,
replaysOnErrorSampleRate: 1.0,
replaysOnErrorSampleRate: 1.0,
// This sets the sample rate to be 10%. You may want this to be 100% while
// in development and sample at a lower rate in production
replaysSessionSampleRate: 0.1,
// This sets the sample rate to be 10%. You may want this to be 100% while
// in development and sample at a lower rate in production
replaysSessionSampleRate: 0.1,
// You can remove this option if you're not planning to use the Sentry Session Replay feature:
integrations: [
Sentry.replayIntegration({
// Additional Replay configuration goes in here, for example:
maskAllText: true,
blockAllMedia: true,
}),
Sentry.feedbackIntegration({
// Additional SDK configuration goes in here, for example:
colorScheme: "light",
}),
],
});
// You can remove this option if you're not planning to use the Sentry Session Replay feature:
integrations: [
Sentry.replayIntegration({
// Additional Replay configuration goes in here, for example:
maskAllText: true,
blockAllMedia: true,
}),
Sentry.feedbackIntegration({
// Additional SDK configuration goes in here, for example:
colorScheme: "light",
}),
],
});
}

View File

@ -4,13 +4,14 @@
// https://docs.sentry.io/platforms/javascript/guides/nextjs/
import * as Sentry from "@sentry/nextjs";
if (process.env.NODE_ENV === "production") {
Sentry.init({
dsn: "https://523c4056ba48d012c62a377dfc49f647@o4506728662564864.ingest.sentry.io/4506728672264192",
Sentry.init({
dsn: "https://523c4056ba48d012c62a377dfc49f647@o4506728662564864.ingest.sentry.io/4506728672264192",
// Adjust this value in production, or use tracesSampler for greater control
tracesSampleRate: 1,
// Adjust this value in production, or use tracesSampler for greater control
tracesSampleRate: 1,
// Setting this option to true will print useful information to the console while you're setting up Sentry.
debug: false,
});
// Setting this option to true will print useful information to the console while you're setting up Sentry.
debug: false,
});
}

View File

@ -3,13 +3,14 @@
// https://docs.sentry.io/platforms/javascript/guides/nextjs/
import * as Sentry from "@sentry/nextjs";
if (process.env.NODE_ENV === "production") {
Sentry.init({
dsn: "https://523c4056ba48d012c62a377dfc49f647@o4506728662564864.ingest.sentry.io/4506728672264192",
Sentry.init({
dsn: "https://523c4056ba48d012c62a377dfc49f647@o4506728662564864.ingest.sentry.io/4506728672264192",
// Adjust this value in production, or use tracesSampler for greater control
tracesSampleRate: 1,
// Adjust this value in production, or use tracesSampler for greater control
tracesSampleRate: 1,
// Setting this option to true will print useful information to the console while you're setting up Sentry.
debug: false,
});
// Setting this option to true will print useful information to the console while you're setting up Sentry.
debug: false,
});
}