From 2fe938b7da43f13a66c94cab37eb5a15111d5e47 Mon Sep 17 00:00:00 2001 From: zxhlyh Date: Wed, 24 Jan 2024 17:51:21 +0800 Subject: [PATCH] fix: knowledge api doc (#2174) --- .../datasets/template/template.en.mdx | 22 ++++++++--------- .../datasets/template/template.zh.mdx | 24 +++++++++---------- 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/web/app/(commonLayout)/datasets/template/template.en.mdx b/web/app/(commonLayout)/datasets/template/template.en.mdx index 06f3489409..f2b4e604ee 100644 --- a/web/app/(commonLayout)/datasets/template/template.en.mdx +++ b/web/app/(commonLayout)/datasets/template/template.en.mdx @@ -144,23 +144,18 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from ### Request Body - - Source document ID (optional) + + - original_document_id Source document ID (optional) - Used to re-upload the document or modify the document cleaning and segmentation configuration. The missing information is copied from the source document - The source document cannot be an archived document - When original_document_id is passed in, the update operation is performed on behalf of the document. process_rule is a fillable item. If not filled in, the segmentation method of the source document will be used by defaul - When original_document_id is not passed in, the new operation is performed on behalf of the document, and process_rule is required - - - Files that need to be uploaded. - - - Index mode + + - indexing_technique Index mode - high_quality High quality: embedding using embedding model, built as vector database index - economy Economy: Build using inverted index of Keyword Table Index - - - Processing rules + + - process_rule Processing rules - mode (string) Cleaning, segmentation mode, automatic / custom - rules (object) Custom rules (in automatic mode, this field is empty) - pre_processing_rules (array[object]) Preprocessing rules @@ -173,6 +168,9 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - separator Custom segment identifier, currently only allows one delimiter to be set. Default is \n - max_tokens Maximum length (token) defaults to 1000 + + Files that need to be uploaded. + @@ -180,7 +178,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from title="Request" tag="POST" label="/datasets/{dataset_id}/document/create_by_file" - targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`} + targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`} > ```bash {{ title: 'cURL' }} curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \ diff --git a/web/app/(commonLayout)/datasets/template/template.zh.mdx b/web/app/(commonLayout)/datasets/template/template.zh.mdx index 2906f9c4e7..11bcd5a760 100644 --- a/web/app/(commonLayout)/datasets/template/template.zh.mdx +++ b/web/app/(commonLayout)/datasets/template/template.zh.mdx @@ -142,25 +142,20 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - ### Request Body + ### Request Bodys - - 源文档 ID (选填) + + - original_document_id 源文档 ID (选填) - 用于重新上传文档或修改文档清洗、分段配置,缺失的信息从源文档复制 - 源文档不可为归档的文档 - 当传入 original_document_id 时,代表文档进行更新操作,process_rule 为可填项目,不填默认使用源文档的分段方式 - 未传入 original_document_id 时,代表文档进行新增操作,process_rule 为必填 - - - 需要上传的文件。 - - - 索引方式 + + - indexing_technique 索引方式 - high_quality 高质量:使用 embedding 模型进行嵌入,构建为向量数据库索引 - economy 经济:使用 Keyword Table Index 的倒排索引进行构建 - - - 处理规则 + + - process_rule 处理规则 - mode (string) 清洗、分段模式 ,automatic 自动 / custom 自定义 - rules (object) 自定义规则(自动模式下,该字段为空) - pre_processing_rules (array[object]) 预处理规则 @@ -173,6 +168,9 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from - separator 自定义分段标识符,目前仅允许设置一个分隔符。默认为 \n - max_tokens 最大长度 (token) 默认为 1000 + + 需要上传的文件。 + @@ -180,7 +178,7 @@ import { Row, Col, Properties, Property, Heading, SubProperty, Paragraph } from title="Request" tag="POST" label="/datasets/{dataset_id}/document/create_by_file" - targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"name":"Dify","indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`} + targetCode={`curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \\\n--header 'Authorization: Bearer {api_key}' \\\n--form 'data="{"indexing_technique":"high_quality","process_rule":{"rules":{"pre_processing_rules":[{"id":"remove_extra_spaces","enabled":true},{"id":"remove_urls_emails","enabled":true}],"segmentation":{"separator":"###","max_tokens":500}},"mode":"custom"}}";type=text/plain' \\\n--form 'file=@"/path/to/file"'`} > ```bash {{ title: 'cURL' }} curl --location POST '${props.apiBaseUrl}/datasets/{dataset_id}/document/create_by_file' \