"use client"; import { useTranslation } from "react-i18next"; import dayjs from "dayjs"; import { formatNumber, formatFileSize, formatTime } from '@/utils/format' import type { DocType } from '@/models/datasets' export type inputType = 'input' | 'select' | 'textarea' export type metadataType = DocType | 'originInfo' | 'technicalParameters' type MetadataMap = Record< metadataType, { text: string; allowEdit?: boolean; icon?: React.ReactNode; iconName?: string; subFieldsMap: Record< string, { label: string; inputType?: inputType; field?: string; render?: (value: any, total?: number) => React.ReactNode | string } >; } >; const fieldPrefix = "datasetDocuments.metadata.field"; export const useMetadataMap = (): MetadataMap => { const { t } = useTranslation(); return { book: { text: t("datasetDocuments.metadata.type.book"), iconName: "bookOpen", subFieldsMap: { title: { label: t(`${fieldPrefix}.book.title`) }, language: { label: t(`${fieldPrefix}.book.language`), inputType: "select", }, author: { label: t(`${fieldPrefix}.book.author`) }, publisher: { label: t(`${fieldPrefix}.book.publisher`) }, publication_date: { label: t(`${fieldPrefix}.book.publicationDate`) }, isbn: { label: t(`${fieldPrefix}.book.ISBN`) }, category: { label: t(`${fieldPrefix}.book.category`), inputType: "select", }, }, }, web_page: { text: t("datasetDocuments.metadata.type.webPage"), iconName: "globe", subFieldsMap: { title: { label: t(`${fieldPrefix}.webPage.title`) }, url: { label: t(`${fieldPrefix}.webPage.url`) }, language: { label: t(`${fieldPrefix}.webPage.language`), inputType: "select", }, ['author/publisher']: { label: t(`${fieldPrefix}.webPage.authorPublisher`) }, publish_date: { label: t(`${fieldPrefix}.webPage.publishDate`) }, ['topics/keywords']: { label: t(`${fieldPrefix}.webPage.topicsKeywords`) }, description: { label: t(`${fieldPrefix}.webPage.description`) }, }, }, paper: { text: t("datasetDocuments.metadata.type.paper"), iconName: "graduationHat", subFieldsMap: { title: { label: t(`${fieldPrefix}.paper.title`) }, language: { label: t(`${fieldPrefix}.paper.language`), inputType: "select", }, author: { label: t(`${fieldPrefix}.paper.author`) }, publish_date: { label: t(`${fieldPrefix}.paper.publishDate`) }, ['journal/conference_name']: { label: t(`${fieldPrefix}.paper.journalConferenceName`), }, ['volume/issue/page_numbers']: { label: t(`${fieldPrefix}.paper.volumeIssuePage`) }, doi: { label: t(`${fieldPrefix}.paper.DOI`) }, ['topics/keywords']: { label: t(`${fieldPrefix}.paper.topicsKeywords`) }, abstract: { label: t(`${fieldPrefix}.paper.abstract`), inputType: "textarea", }, }, }, social_media_post: { text: t("datasetDocuments.metadata.type.socialMediaPost"), iconName: "atSign", subFieldsMap: { platform: { label: t(`${fieldPrefix}.socialMediaPost.platform`) }, ['author/username']: { label: t(`${fieldPrefix}.socialMediaPost.authorUsername`), }, publish_date: { label: t(`${fieldPrefix}.socialMediaPost.publishDate`) }, post_url: { label: t(`${fieldPrefix}.socialMediaPost.postURL`) }, ['topics/tags']: { label: t(`${fieldPrefix}.socialMediaPost.topicsTags`) }, }, }, personal_document: { text: t("datasetDocuments.metadata.type.personalDocument"), iconName: "file", subFieldsMap: { title: { label: t(`${fieldPrefix}.personalDocument.title`) }, author: { label: t(`${fieldPrefix}.personalDocument.author`) }, creation_date: { label: t(`${fieldPrefix}.personalDocument.creationDate`), }, last_modified_date: { label: t(`${fieldPrefix}.personalDocument.lastModifiedDate`), }, document_type: { label: t(`${fieldPrefix}.personalDocument.documentType`), inputType: "select", }, ['tags/category']: { label: t(`${fieldPrefix}.personalDocument.tagsCategory`), }, }, }, business_document: { text: t("datasetDocuments.metadata.type.businessDocument"), iconName: "briefcase", subFieldsMap: { title: { label: t(`${fieldPrefix}.businessDocument.title`) }, author: { label: t(`${fieldPrefix}.businessDocument.author`) }, creation_date: { label: t(`${fieldPrefix}.businessDocument.creationDate`), }, last_modified_date: { label: t(`${fieldPrefix}.businessDocument.lastModifiedDate`), }, document_type: { label: t(`${fieldPrefix}.businessDocument.documentType`), inputType: "select", }, ['department/team']: { label: t(`${fieldPrefix}.businessDocument.departmentTeam`), }, }, }, im_chat_log: { text: t("datasetDocuments.metadata.type.IMChat"), iconName: "messageTextCircle", subFieldsMap: { chat_platform: { label: t(`${fieldPrefix}.IMChat.chatPlatform`) }, ['chat_participants/group_name']: { label: t(`${fieldPrefix}.IMChat.chatPartiesGroupName`), }, start_date: { label: t(`${fieldPrefix}.IMChat.startDate`) }, end_date: { label: t(`${fieldPrefix}.IMChat.endDate`) }, participants: { label: t(`${fieldPrefix}.IMChat.participants`) }, topicsKeywords: { label: t(`${fieldPrefix}.IMChat.topicsKeywords`), inputType: "textarea", }, fileType: { label: t(`${fieldPrefix}.IMChat.fileType`) }, }, }, wikipedia_entry: { text: t("datasetDocuments.metadata.type.wikipediaEntry"), allowEdit: false, subFieldsMap: { title: { label: t(`${fieldPrefix}.wikipediaEntry.title`) }, language: { label: t(`${fieldPrefix}.wikipediaEntry.language`), inputType: "select", }, web_page_url: { label: t(`${fieldPrefix}.wikipediaEntry.webpageURL`) }, ['editor/contributor']: { label: t(`${fieldPrefix}.wikipediaEntry.editorContributor`), }, last_edit_date: { label: t(`${fieldPrefix}.wikipediaEntry.lastEditDate`), }, ['summary/introduction']: { label: t(`${fieldPrefix}.wikipediaEntry.summaryIntroduction`), inputType: "textarea", }, }, }, synced_from_notion: { text: t("datasetDocuments.metadata.type.notion"), allowEdit: false, subFieldsMap: { title: { label: t(`${fieldPrefix}.notion.title`) }, language: { label: t(`${fieldPrefix}.notion.lang`), inputType: "select" }, ['author/creator']: { label: t(`${fieldPrefix}.notion.author`) }, creation_date: { label: t(`${fieldPrefix}.notion.createdTime`) }, last_modified_date: { label: t(`${fieldPrefix}.notion.lastModifiedTime`), }, notion_page_link: { label: t(`${fieldPrefix}.notion.url`) }, ['category/tags']: { label: t(`${fieldPrefix}.notion.tag`) }, description: { label: t(`${fieldPrefix}.notion.desc`) }, }, }, synced_from_github: { text: t("datasetDocuments.metadata.type.github"), allowEdit: false, subFieldsMap: { repository_name: { label: t(`${fieldPrefix}.github.repoName`) }, repository_description: { label: t(`${fieldPrefix}.github.repoDesc`) }, ['repository_owner/organization']: { label: t(`${fieldPrefix}.github.repoOwner`) }, code_filename: { label: t(`${fieldPrefix}.github.fileName`) }, code_file_path: { label: t(`${fieldPrefix}.github.filePath`) }, programming_language: { label: t(`${fieldPrefix}.github.programmingLang`) }, github_link: { label: t(`${fieldPrefix}.github.url`) }, open_source_license: { label: t(`${fieldPrefix}.github.license`) }, commit_date: { label: t(`${fieldPrefix}.github.lastCommitTime`) }, commit_author: { label: t(`${fieldPrefix}.github.lastCommitAuthor`), }, }, }, originInfo: { text: "", allowEdit: false, subFieldsMap: { name: { label: t(`${fieldPrefix}.originInfo.originalFilename`) }, "data_source_info.upload_file.size": { label: t(`${fieldPrefix}.originInfo.originalFileSize`), render: (value) => formatFileSize(value) }, created_at: { label: t(`${fieldPrefix}.originInfo.uploadDate`), render: (value) => dayjs.unix(value).format(t('datasetDocuments.metadata.dateTimeFormat') as string) }, completed_at: { label: t(`${fieldPrefix}.originInfo.lastUpdateDate`), render: (value) => dayjs.unix(value).format(t('datasetDocuments.metadata.dateTimeFormat') as string) }, data_source_type: { label: t(`${fieldPrefix}.originInfo.source`), render: (value) => t(`datasetDocuments.metadata.source.${value}`) }, }, }, technicalParameters: { text: t("datasetDocuments.metadata.type.technicalParameters"), allowEdit: false, subFieldsMap: { 'dataset_process_rule.mode': { label: t(`${fieldPrefix}.technicalParameters.segmentSpecification`), render: value => value === 'automatic' ? (t('datasetDocuments.embedding.automatic') as string) : (t('datasetDocuments.embedding.custom') as string) }, 'dataset_process_rule.rules.segmentation.max_tokens': { label: t(`${fieldPrefix}.technicalParameters.segmentLength`), render: value => formatNumber(value) }, average_segment_length: { label: t(`${fieldPrefix}.technicalParameters.avgParagraphLength`), render: (value) => `${formatNumber(value)} characters` }, segment_count: { label: t(`${fieldPrefix}.technicalParameters.paragraphs`), render: (value) => `${formatNumber(value)} paragraphs` }, hit_count: { label: t(`${fieldPrefix}.technicalParameters.hitCount`), render: (value, total) => { const v = value || 0; return `${!total ? 0 : ((v / total) * 100).toFixed(2)}% (${v}/${total})` } }, indexing_latency: { label: t(`${fieldPrefix}.technicalParameters.embeddingTime`), render: (value) => formatTime(value) }, tokens: { label: t(`${fieldPrefix}.technicalParameters.embeddedSpend`), render: (value) => `${formatNumber(value)} tokens` }, }, }, }; }; const langPrefix = "datasetDocuments.metadata.languageMap."; export const useLanguages = () => { const { t } = useTranslation(); return { zh: t(langPrefix + "zh"), en: t(langPrefix + "en"), es: t(langPrefix + "es"), fr: t(langPrefix + "fr"), de: t(langPrefix + "de"), ja: t(langPrefix + "ja"), ko: t(langPrefix + "ko"), ru: t(langPrefix + "ru"), ar: t(langPrefix + "ar"), pt: t(langPrefix + "pt"), it: t(langPrefix + "it"), nl: t(langPrefix + "nl"), pl: t(langPrefix + "pl"), sv: t(langPrefix + "sv"), tr: t(langPrefix + "tr"), he: t(langPrefix + "he"), hi: t(langPrefix + "hi"), da: t(langPrefix + "da"), fi: t(langPrefix + "fi"), no: t(langPrefix + "no"), hu: t(langPrefix + "hu"), el: t(langPrefix + "el"), cs: t(langPrefix + "cs"), th: t(langPrefix + "th"), id: t(langPrefix + "id"), }; }; const bookCategoryPrefix = "datasetDocuments.metadata.categoryMap.book."; export const useBookCategories = () => { const { t } = useTranslation(); return { fiction: t(bookCategoryPrefix + "fiction"), biography: t(bookCategoryPrefix + "biography"), history: t(bookCategoryPrefix + "history"), science: t(bookCategoryPrefix + "science"), technology: t(bookCategoryPrefix + "technology"), education: t(bookCategoryPrefix + "education"), philosophy: t(bookCategoryPrefix + "philosophy"), religion: t(bookCategoryPrefix + "religion"), socialSciences: t(bookCategoryPrefix + "socialSciences"), art: t(bookCategoryPrefix + "art"), travel: t(bookCategoryPrefix + "travel"), health: t(bookCategoryPrefix + "health"), selfHelp: t(bookCategoryPrefix + "selfHelp"), businessEconomics: t(bookCategoryPrefix + "businessEconomics"), cooking: t(bookCategoryPrefix + "cooking"), childrenYoungAdults: t(bookCategoryPrefix + "childrenYoungAdults"), comicsGraphicNovels: t(bookCategoryPrefix + "comicsGraphicNovels"), poetry: t(bookCategoryPrefix + "poetry"), drama: t(bookCategoryPrefix + "drama"), other: t(bookCategoryPrefix + "other"), }; }; const personalDocCategoryPrefix = "datasetDocuments.metadata.categoryMap.personalDoc."; export const usePersonalDocCategories = () => { const { t } = useTranslation(); return { notes: t(personalDocCategoryPrefix + "notes"), blogDraft: t(personalDocCategoryPrefix + "blogDraft"), diary: t(personalDocCategoryPrefix + "diary"), researchReport: t(personalDocCategoryPrefix + "researchReport"), bookExcerpt: t(personalDocCategoryPrefix + "bookExcerpt"), schedule: t(personalDocCategoryPrefix + "schedule"), list: t(personalDocCategoryPrefix + "list"), projectOverview: t(personalDocCategoryPrefix + "projectOverview"), photoCollection: t(personalDocCategoryPrefix + "photoCollection"), creativeWriting: t(personalDocCategoryPrefix + "creativeWriting"), codeSnippet: t(personalDocCategoryPrefix + "codeSnippet"), designDraft: t(personalDocCategoryPrefix + "designDraft"), personalResume: t(personalDocCategoryPrefix + "personalResume"), other: t(personalDocCategoryPrefix + "other"), }; }; const businessDocCategoryPrefix = "datasetDocuments.metadata.categoryMap.businessDoc."; export const useBusinessDocCategories = () => { const { t } = useTranslation(); return { meetingMinutes: t(businessDocCategoryPrefix + "meetingMinutes"), researchReport: t(businessDocCategoryPrefix + "researchReport"), proposal: t(businessDocCategoryPrefix + "proposal"), employeeHandbook: t(businessDocCategoryPrefix + "employeeHandbook"), trainingMaterials: t(businessDocCategoryPrefix + "trainingMaterials"), requirementsDocument: t(businessDocCategoryPrefix + "requirementsDocument"), designDocument: t(businessDocCategoryPrefix + "designDocument"), productSpecification: t(businessDocCategoryPrefix + "productSpecification"), financialReport: t(businessDocCategoryPrefix + "financialReport"), marketAnalysis: t(businessDocCategoryPrefix + "marketAnalysis"), projectPlan: t(businessDocCategoryPrefix + "projectPlan"), teamStructure: t(businessDocCategoryPrefix + "teamStructure"), policiesProcedures: t(businessDocCategoryPrefix + "policiesProcedures"), contractsAgreements: t(businessDocCategoryPrefix + "contractsAgreements"), emailCorrespondence: t(businessDocCategoryPrefix + "emailCorrespondence"), other: t(businessDocCategoryPrefix + "other"), }; };