paper-ai-release-24-07-21/components/GetArxiv.tsx

87 lines
2.4 KiB
TypeScript
Raw Permalink Normal View History

2024-01-18 15:46:18 +08:00
import axios, { AxiosError } from "axios";
import { getRandomOffset } from "@/utils/others/quillutils";
const xml2js = require("xml2js");
interface ArxivFeed {
feed: {
xmlns: string;
entry: ArxivEntry[];
id: string[];
link: Array<{ [key: string]: string }>;
"opensearch:itemsPerPage": Array<{ [key: string]: string }>;
"opensearch:startIndex": Array<{ [key: string]: string }>;
"opensearch:totalResults": Array<{ [key: string]: string }>;
title: Array<{ [key: string]: string }>;
updated: string[];
};
}
interface ArxivEntry {
"arxiv:comment": Array<{ [key: string]: string }>;
"arxiv:primary_category": Array<{ [key: string]: string }>;
author: Author[];
category: Array<{ [key: string]: string }>;
id: string[];
link: Array<{ [key: string]: string }>;
published: string[];
summary: string[];
title: string[];
updated: string[];
}
interface Author {
name: string;
affiliation?: string; // Assuming affiliation might be optional
}
async function getArxivPapers(
query: string,
maxResults = 2,
2024-02-18 12:10:53 +08:00
offset = -1,
2024-01-18 15:46:18 +08:00
sortBy = "submittedDate",
sortOrder = "descending"
) {
const maxOffset = 30 - maxResults; // 假设总记录数为 20
2024-02-18 12:10:53 +08:00
if (offset === -1) offset = getRandomOffset(maxOffset);
console.log("offset in arxiv", offset);
2024-02-18 12:10:53 +08:00
const url = `https://export.arxiv.org/api/query?search_query=${query}&start=${offset}&max_results=${maxResults}&sortBy=${sortBy}&sortOrder=${sortOrder}`;
2024-01-18 15:46:18 +08:00
try {
const response = await axios.get(url);
let result = await xml2js.parseStringPromise(response.data);
// 这里你将得到 JSON 格式的结果
console.log(result);
// 你可以在这里处理数据
result = extractArxivData(result);
return result;
} catch (error: any) {
throw new Error(
2024-03-15 15:47:56 +08:00
`Arxiv失败请使用英文并缩短关键词:${JSON.stringify(
error.response,
null,
2
)}`
);
// return null;
2024-01-18 15:46:18 +08:00
}
}
function extractArxivData(data: ArxivFeed) {
// const entries = data.feed.entry;
const entries = data.feed.entry.slice(0, 2); // 只获取前两个条目
const extractedData = entries.map((entry: ArxivEntry) => {
return {
id: entry.id[0],
published: entry.published[0],
title: entry.title[0],
abstract: entry.summary[0],
2024-01-26 23:41:41 +08:00
authors: entry.author.map((author) => author.name[0]),
2024-01-18 15:46:18 +08:00
};
});
return extractedData;
}
export default getArxivPapers;