dify/web/hooks/use-metadata.ts

393 lines
15 KiB
TypeScript

'use client'
import { useTranslation } from 'react-i18next'
import dayjs from 'dayjs'
import { formatFileSize, formatNumber, formatTime } from '@/utils/format'
import type { DocType } from '@/models/datasets'
export type inputType = 'input' | 'select' | 'textarea'
export type metadataType = DocType | 'originInfo' | 'technicalParameters'
type MetadataMap =
Record<
metadataType,
{
text: string
allowEdit?: boolean
icon?: React.ReactNode
iconName?: string
subFieldsMap: Record<
string,
{
label: string
inputType?: inputType
field?: string
render?: (value: any, total?: number) => React.ReactNode | string
}
>
}
>
const fieldPrefix = 'datasetDocuments.metadata.field'
export const useMetadataMap = (): MetadataMap => {
const { t } = useTranslation()
return {
book: {
text: t('datasetDocuments.metadata.type.book'),
iconName: 'bookOpen',
subFieldsMap: {
title: { label: t(`${fieldPrefix}.book.title`) },
language: {
label: t(`${fieldPrefix}.book.language`),
inputType: 'select',
},
author: { label: t(`${fieldPrefix}.book.author`) },
publisher: { label: t(`${fieldPrefix}.book.publisher`) },
publication_date: { label: t(`${fieldPrefix}.book.publicationDate`) },
isbn: { label: t(`${fieldPrefix}.book.ISBN`) },
category: {
label: t(`${fieldPrefix}.book.category`),
inputType: 'select',
},
},
},
web_page: {
text: t('datasetDocuments.metadata.type.webPage'),
iconName: 'globe',
subFieldsMap: {
'title': { label: t(`${fieldPrefix}.webPage.title`) },
'url': { label: t(`${fieldPrefix}.webPage.url`) },
'language': {
label: t(`${fieldPrefix}.webPage.language`),
inputType: 'select',
},
'author/publisher': { label: t(`${fieldPrefix}.webPage.authorPublisher`) },
'publish_date': { label: t(`${fieldPrefix}.webPage.publishDate`) },
'topics/keywords': { label: t(`${fieldPrefix}.webPage.topicsKeywords`) },
'description': { label: t(`${fieldPrefix}.webPage.description`) },
},
},
paper: {
text: t('datasetDocuments.metadata.type.paper'),
iconName: 'graduationHat',
subFieldsMap: {
'title': { label: t(`${fieldPrefix}.paper.title`) },
'language': {
label: t(`${fieldPrefix}.paper.language`),
inputType: 'select',
},
'author': { label: t(`${fieldPrefix}.paper.author`) },
'publish_date': { label: t(`${fieldPrefix}.paper.publishDate`) },
'journal/conference_name': {
label: t(`${fieldPrefix}.paper.journalConferenceName`),
},
'volume/issue/page_numbers': { label: t(`${fieldPrefix}.paper.volumeIssuePage`) },
'doi': { label: t(`${fieldPrefix}.paper.DOI`) },
'topics/keywords': { label: t(`${fieldPrefix}.paper.topicsKeywords`) },
'abstract': {
label: t(`${fieldPrefix}.paper.abstract`),
inputType: 'textarea',
},
},
},
social_media_post: {
text: t('datasetDocuments.metadata.type.socialMediaPost'),
iconName: 'atSign',
subFieldsMap: {
'platform': { label: t(`${fieldPrefix}.socialMediaPost.platform`) },
'author/username': {
label: t(`${fieldPrefix}.socialMediaPost.authorUsername`),
},
'publish_date': { label: t(`${fieldPrefix}.socialMediaPost.publishDate`) },
'post_url': { label: t(`${fieldPrefix}.socialMediaPost.postURL`) },
'topics/tags': { label: t(`${fieldPrefix}.socialMediaPost.topicsTags`) },
},
},
personal_document: {
text: t('datasetDocuments.metadata.type.personalDocument'),
iconName: 'file',
subFieldsMap: {
'title': { label: t(`${fieldPrefix}.personalDocument.title`) },
'author': { label: t(`${fieldPrefix}.personalDocument.author`) },
'creation_date': {
label: t(`${fieldPrefix}.personalDocument.creationDate`),
},
'last_modified_date': {
label: t(`${fieldPrefix}.personalDocument.lastModifiedDate`),
},
'document_type': {
label: t(`${fieldPrefix}.personalDocument.documentType`),
inputType: 'select',
},
'tags/category': {
label: t(`${fieldPrefix}.personalDocument.tagsCategory`),
},
},
},
business_document: {
text: t('datasetDocuments.metadata.type.businessDocument'),
iconName: 'briefcase',
subFieldsMap: {
'title': { label: t(`${fieldPrefix}.businessDocument.title`) },
'author': { label: t(`${fieldPrefix}.businessDocument.author`) },
'creation_date': {
label: t(`${fieldPrefix}.businessDocument.creationDate`),
},
'last_modified_date': {
label: t(`${fieldPrefix}.businessDocument.lastModifiedDate`),
},
'document_type': {
label: t(`${fieldPrefix}.businessDocument.documentType`),
inputType: 'select',
},
'department/team': {
label: t(`${fieldPrefix}.businessDocument.departmentTeam`),
},
},
},
im_chat_log: {
text: t('datasetDocuments.metadata.type.IMChat'),
iconName: 'messageTextCircle',
subFieldsMap: {
'chat_platform': { label: t(`${fieldPrefix}.IMChat.chatPlatform`) },
'chat_participants/group_name': {
label: t(`${fieldPrefix}.IMChat.chatPartiesGroupName`),
},
'start_date': { label: t(`${fieldPrefix}.IMChat.startDate`) },
'end_date': { label: t(`${fieldPrefix}.IMChat.endDate`) },
'participants': { label: t(`${fieldPrefix}.IMChat.participants`) },
'topicsKeywords': {
label: t(`${fieldPrefix}.IMChat.topicsKeywords`),
inputType: 'textarea',
},
'fileType': { label: t(`${fieldPrefix}.IMChat.fileType`) },
},
},
wikipedia_entry: {
text: t('datasetDocuments.metadata.type.wikipediaEntry'),
allowEdit: false,
subFieldsMap: {
'title': { label: t(`${fieldPrefix}.wikipediaEntry.title`) },
'language': {
label: t(`${fieldPrefix}.wikipediaEntry.language`),
inputType: 'select',
},
'web_page_url': { label: t(`${fieldPrefix}.wikipediaEntry.webpageURL`) },
'editor/contributor': {
label: t(`${fieldPrefix}.wikipediaEntry.editorContributor`),
},
'last_edit_date': {
label: t(`${fieldPrefix}.wikipediaEntry.lastEditDate`),
},
'summary/introduction': {
label: t(`${fieldPrefix}.wikipediaEntry.summaryIntroduction`),
inputType: 'textarea',
},
},
},
synced_from_notion: {
text: t('datasetDocuments.metadata.type.notion'),
allowEdit: false,
subFieldsMap: {
'title': { label: t(`${fieldPrefix}.notion.title`) },
'language': { label: t(`${fieldPrefix}.notion.lang`), inputType: 'select' },
'author/creator': { label: t(`${fieldPrefix}.notion.author`) },
'creation_date': { label: t(`${fieldPrefix}.notion.createdTime`) },
'last_modified_date': {
label: t(`${fieldPrefix}.notion.lastModifiedTime`),
},
'notion_page_link': { label: t(`${fieldPrefix}.notion.url`) },
'category/tags': { label: t(`${fieldPrefix}.notion.tag`) },
'description': { label: t(`${fieldPrefix}.notion.desc`) },
},
},
synced_from_github: {
text: t('datasetDocuments.metadata.type.github'),
allowEdit: false,
subFieldsMap: {
'repository_name': { label: t(`${fieldPrefix}.github.repoName`) },
'repository_description': { label: t(`${fieldPrefix}.github.repoDesc`) },
'repository_owner/organization': { label: t(`${fieldPrefix}.github.repoOwner`) },
'code_filename': { label: t(`${fieldPrefix}.github.fileName`) },
'code_file_path': { label: t(`${fieldPrefix}.github.filePath`) },
'programming_language': { label: t(`${fieldPrefix}.github.programmingLang`) },
'github_link': { label: t(`${fieldPrefix}.github.url`) },
'open_source_license': { label: t(`${fieldPrefix}.github.license`) },
'commit_date': { label: t(`${fieldPrefix}.github.lastCommitTime`) },
'commit_author': {
label: t(`${fieldPrefix}.github.lastCommitAuthor`),
},
},
},
originInfo: {
text: '',
allowEdit: false,
subFieldsMap: {
'name': { label: t(`${fieldPrefix}.originInfo.originalFilename`) },
'data_source_info.upload_file.size': {
label: t(`${fieldPrefix}.originInfo.originalFileSize`),
render: value => formatFileSize(value),
},
'created_at': {
label: t(`${fieldPrefix}.originInfo.uploadDate`),
render: value => dayjs.unix(value).format(t('datasetDocuments.metadata.dateTimeFormat') as string),
},
'completed_at': {
label: t(`${fieldPrefix}.originInfo.lastUpdateDate`),
render: value => dayjs.unix(value).format(t('datasetDocuments.metadata.dateTimeFormat') as string),
},
'data_source_type': {
label: t(`${fieldPrefix}.originInfo.source`),
render: value => t(`datasetDocuments.metadata.source.${value}`),
},
},
},
technicalParameters: {
text: t('datasetDocuments.metadata.type.technicalParameters'),
allowEdit: false,
subFieldsMap: {
'dataset_process_rule.mode': {
label: t(`${fieldPrefix}.technicalParameters.segmentSpecification`),
render: value => value === 'automatic' ? (t('datasetDocuments.embedding.automatic') as string) : (t('datasetDocuments.embedding.custom') as string),
},
'dataset_process_rule.rules.segmentation.max_tokens': {
label: t(`${fieldPrefix}.technicalParameters.segmentLength`),
render: value => formatNumber(value),
},
'average_segment_length': {
label: t(`${fieldPrefix}.technicalParameters.avgParagraphLength`),
render: value => `${formatNumber(value)} characters`,
},
'segment_count': {
label: t(`${fieldPrefix}.technicalParameters.paragraphs`),
render: value => `${formatNumber(value)} paragraphs`,
},
'hit_count': {
label: t(`${fieldPrefix}.technicalParameters.hitCount`),
render: (value, total) => {
const v = value || 0
return `${!total ? 0 : ((v / total) * 100).toFixed(2)}% (${v}/${total})`
},
},
'indexing_latency': {
label: t(`${fieldPrefix}.technicalParameters.embeddingTime`),
render: value => formatTime(value),
},
'tokens': {
label: t(`${fieldPrefix}.technicalParameters.embeddedSpend`),
render: value => `${formatNumber(value)} tokens`,
},
},
},
}
}
const langPrefix = 'datasetDocuments.metadata.languageMap.'
export const useLanguages = () => {
const { t } = useTranslation()
return {
zh: t(`${langPrefix}zh`),
en: t(`${langPrefix}en`),
es: t(`${langPrefix}es`),
fr: t(`${langPrefix}fr`),
de: t(`${langPrefix}de`),
ja: t(`${langPrefix}ja`),
ko: t(`${langPrefix}ko`),
ru: t(`${langPrefix}ru`),
ar: t(`${langPrefix}ar`),
pt: t(`${langPrefix}pt`),
it: t(`${langPrefix}it`),
nl: t(`${langPrefix}nl`),
pl: t(`${langPrefix}pl`),
sv: t(`${langPrefix}sv`),
tr: t(`${langPrefix}tr`),
he: t(`${langPrefix}he`),
hi: t(`${langPrefix}hi`),
da: t(`${langPrefix}da`),
fi: t(`${langPrefix}fi`),
no: t(`${langPrefix}no`),
hu: t(`${langPrefix}hu`),
el: t(`${langPrefix}el`),
cs: t(`${langPrefix}cs`),
th: t(`${langPrefix}th`),
id: t(`${langPrefix}id`),
}
}
const bookCategoryPrefix = 'datasetDocuments.metadata.categoryMap.book.'
export const useBookCategories = () => {
const { t } = useTranslation()
return {
fiction: t(`${bookCategoryPrefix}fiction`),
biography: t(`${bookCategoryPrefix}biography`),
history: t(`${bookCategoryPrefix}history`),
science: t(`${bookCategoryPrefix}science`),
technology: t(`${bookCategoryPrefix}technology`),
education: t(`${bookCategoryPrefix}education`),
philosophy: t(`${bookCategoryPrefix}philosophy`),
religion: t(`${bookCategoryPrefix}religion`),
socialSciences: t(`${bookCategoryPrefix}socialSciences`),
art: t(`${bookCategoryPrefix}art`),
travel: t(`${bookCategoryPrefix}travel`),
health: t(`${bookCategoryPrefix}health`),
selfHelp: t(`${bookCategoryPrefix}selfHelp`),
businessEconomics: t(`${bookCategoryPrefix}businessEconomics`),
cooking: t(`${bookCategoryPrefix}cooking`),
childrenYoungAdults: t(`${bookCategoryPrefix}childrenYoungAdults`),
comicsGraphicNovels: t(`${bookCategoryPrefix}comicsGraphicNovels`),
poetry: t(`${bookCategoryPrefix}poetry`),
drama: t(`${bookCategoryPrefix}drama`),
other: t(`${bookCategoryPrefix}other`),
}
}
const personalDocCategoryPrefix
= 'datasetDocuments.metadata.categoryMap.personalDoc.'
export const usePersonalDocCategories = () => {
const { t } = useTranslation()
return {
notes: t(`${personalDocCategoryPrefix}notes`),
blogDraft: t(`${personalDocCategoryPrefix}blogDraft`),
diary: t(`${personalDocCategoryPrefix}diary`),
researchReport: t(`${personalDocCategoryPrefix}researchReport`),
bookExcerpt: t(`${personalDocCategoryPrefix}bookExcerpt`),
schedule: t(`${personalDocCategoryPrefix}schedule`),
list: t(`${personalDocCategoryPrefix}list`),
projectOverview: t(`${personalDocCategoryPrefix}projectOverview`),
photoCollection: t(`${personalDocCategoryPrefix}photoCollection`),
creativeWriting: t(`${personalDocCategoryPrefix}creativeWriting`),
codeSnippet: t(`${personalDocCategoryPrefix}codeSnippet`),
designDraft: t(`${personalDocCategoryPrefix}designDraft`),
personalResume: t(`${personalDocCategoryPrefix}personalResume`),
other: t(`${personalDocCategoryPrefix}other`),
}
}
const businessDocCategoryPrefix
= 'datasetDocuments.metadata.categoryMap.businessDoc.'
export const useBusinessDocCategories = () => {
const { t } = useTranslation()
return {
meetingMinutes: t(`${businessDocCategoryPrefix}meetingMinutes`),
researchReport: t(`${businessDocCategoryPrefix}researchReport`),
proposal: t(`${businessDocCategoryPrefix}proposal`),
employeeHandbook: t(`${businessDocCategoryPrefix}employeeHandbook`),
trainingMaterials: t(`${businessDocCategoryPrefix}trainingMaterials`),
requirementsDocument: t(`${businessDocCategoryPrefix}requirementsDocument`),
designDocument: t(`${businessDocCategoryPrefix}designDocument`),
productSpecification: t(`${businessDocCategoryPrefix}productSpecification`),
financialReport: t(`${businessDocCategoryPrefix}financialReport`),
marketAnalysis: t(`${businessDocCategoryPrefix}marketAnalysis`),
projectPlan: t(`${businessDocCategoryPrefix}projectPlan`),
teamStructure: t(`${businessDocCategoryPrefix}teamStructure`),
policiesProcedures: t(`${businessDocCategoryPrefix}policiesProcedures`),
contractsAgreements: t(`${businessDocCategoryPrefix}contractsAgreements`),
emailCorrespondence: t(`${businessDocCategoryPrefix}emailCorrespondence`),
other: t(`${businessDocCategoryPrefix}other`),
}
}