2024-01-18 15:46:18 +08:00
|
|
|
|
import axios, { AxiosError } from "axios";
|
|
|
|
|
import { getRandomOffset } from "@/utils/others/quillutils";
|
|
|
|
|
|
|
|
|
|
const xml2js = require("xml2js");
|
|
|
|
|
|
|
|
|
|
interface ArxivFeed {
|
|
|
|
|
feed: {
|
|
|
|
|
xmlns: string;
|
|
|
|
|
entry: ArxivEntry[];
|
|
|
|
|
id: string[];
|
|
|
|
|
link: Array<{ [key: string]: string }>;
|
|
|
|
|
"opensearch:itemsPerPage": Array<{ [key: string]: string }>;
|
|
|
|
|
"opensearch:startIndex": Array<{ [key: string]: string }>;
|
|
|
|
|
"opensearch:totalResults": Array<{ [key: string]: string }>;
|
|
|
|
|
title: Array<{ [key: string]: string }>;
|
|
|
|
|
updated: string[];
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
interface ArxivEntry {
|
|
|
|
|
"arxiv:comment": Array<{ [key: string]: string }>;
|
|
|
|
|
"arxiv:primary_category": Array<{ [key: string]: string }>;
|
|
|
|
|
author: Author[];
|
|
|
|
|
category: Array<{ [key: string]: string }>;
|
|
|
|
|
id: string[];
|
|
|
|
|
link: Array<{ [key: string]: string }>;
|
|
|
|
|
published: string[];
|
|
|
|
|
summary: string[];
|
|
|
|
|
title: string[];
|
|
|
|
|
updated: string[];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
interface Author {
|
|
|
|
|
name: string;
|
|
|
|
|
affiliation?: string; // Assuming affiliation might be optional
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async function getArxivPapers(
|
|
|
|
|
query: string,
|
2024-02-18 21:20:35 +08:00
|
|
|
|
maxResults = 2,
|
2024-02-18 12:10:53 +08:00
|
|
|
|
offset = -1,
|
2024-01-18 15:46:18 +08:00
|
|
|
|
sortBy = "submittedDate",
|
|
|
|
|
sortOrder = "descending"
|
|
|
|
|
) {
|
2024-02-18 21:20:35 +08:00
|
|
|
|
const maxOffset = 30 - maxResults; // 假设总记录数为 20
|
2024-02-18 12:10:53 +08:00
|
|
|
|
if (offset === -1) offset = getRandomOffset(maxOffset);
|
2024-02-18 21:20:35 +08:00
|
|
|
|
console.log("offset in arxiv", offset);
|
2024-02-18 12:10:53 +08:00
|
|
|
|
const url = `https://export.arxiv.org/api/query?search_query=${query}&start=${offset}&max_results=${maxResults}&sortBy=${sortBy}&sortOrder=${sortOrder}`;
|
2024-01-18 15:46:18 +08:00
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
const response = await axios.get(url);
|
|
|
|
|
let result = await xml2js.parseStringPromise(response.data);
|
|
|
|
|
// 这里你将得到 JSON 格式的结果
|
|
|
|
|
console.log(result);
|
|
|
|
|
// 你可以在这里处理数据
|
|
|
|
|
result = extractArxivData(result);
|
|
|
|
|
return result;
|
2024-02-09 13:45:26 +08:00
|
|
|
|
} catch (error: any) {
|
|
|
|
|
throw new Error(
|
2024-03-15 15:47:56 +08:00
|
|
|
|
`Arxiv失败(请使用英文并缩短关键词):${JSON.stringify(
|
2024-02-09 13:45:26 +08:00
|
|
|
|
error.response,
|
|
|
|
|
null,
|
|
|
|
|
2
|
|
|
|
|
)}`
|
|
|
|
|
);
|
|
|
|
|
// return null;
|
2024-01-18 15:46:18 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function extractArxivData(data: ArxivFeed) {
|
|
|
|
|
// const entries = data.feed.entry;
|
|
|
|
|
const entries = data.feed.entry.slice(0, 2); // 只获取前两个条目
|
|
|
|
|
|
|
|
|
|
const extractedData = entries.map((entry: ArxivEntry) => {
|
|
|
|
|
return {
|
|
|
|
|
id: entry.id[0],
|
|
|
|
|
published: entry.published[0],
|
|
|
|
|
title: entry.title[0],
|
2024-02-22 12:35:08 +08:00
|
|
|
|
abstract: entry.summary[0],
|
2024-01-26 23:41:41 +08:00
|
|
|
|
authors: entry.author.map((author) => author.name[0]),
|
2024-01-18 15:46:18 +08:00
|
|
|
|
};
|
|
|
|
|
});
|
|
|
|
|
return extractedData;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export default getArxivPapers;
|