external knowledge api (#8913)

Co-authored-by: Yi <yxiaoisme@gmail.com>
This commit is contained in:
Jyong 2024-09-30 15:38:43 +08:00 committed by GitHub
parent 77aef9ff1d
commit 9d221a5e19
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
90 changed files with 4623 additions and 1171 deletions

View File

@ -37,7 +37,16 @@ from .auth import activate, data_source_bearer_auth, data_source_oauth, forgot_p
from .billing import billing
# Import datasets controllers
from .datasets import data_source, datasets, datasets_document, datasets_segments, file, hit_testing, website
from .datasets import (
data_source,
datasets,
datasets_document,
datasets_segments,
external,
file,
hit_testing,
website,
)
# Import explore controllers
from .explore import (

View File

@ -49,7 +49,7 @@ class DatasetListApi(Resource):
page = request.args.get("page", default=1, type=int)
limit = request.args.get("limit", default=20, type=int)
ids = request.args.getlist("ids")
provider = request.args.get("provider", default="vendor")
# provider = request.args.get("provider", default="vendor")
search = request.args.get("keyword", default=None, type=str)
tag_ids = request.args.getlist("tag_ids")
@ -57,7 +57,7 @@ class DatasetListApi(Resource):
datasets, total = DatasetService.get_datasets_by_ids(ids, current_user.current_tenant_id)
else:
datasets, total = DatasetService.get_datasets(
page, limit, provider, current_user.current_tenant_id, current_user, search, tag_ids
page, limit, current_user.current_tenant_id, current_user, search, tag_ids
)
# check embedding setting
@ -110,6 +110,26 @@ class DatasetListApi(Resource):
nullable=True,
help="Invalid indexing technique.",
)
parser.add_argument(
"external_knowledge_api_id",
type=str,
nullable=True,
required=False,
)
parser.add_argument(
"provider",
type=str,
nullable=True,
choices=Dataset.PROVIDER_LIST,
required=False,
default="vendor",
)
parser.add_argument(
"external_knowledge_id",
type=str,
nullable=True,
required=False,
)
args = parser.parse_args()
# The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator
@ -123,6 +143,9 @@ class DatasetListApi(Resource):
indexing_technique=args["indexing_technique"],
account=current_user,
permission=DatasetPermissionEnum.ONLY_ME,
provider=args["provider"],
external_knowledge_api_id=args["external_knowledge_api_id"],
external_knowledge_id=args["external_knowledge_id"],
)
except services.errors.dataset.DatasetNameDuplicateError:
raise DatasetNameDuplicateError()
@ -211,6 +234,33 @@ class DatasetApi(Resource):
)
parser.add_argument("retrieval_model", type=dict, location="json", help="Invalid retrieval model.")
parser.add_argument("partial_member_list", type=list, location="json", help="Invalid parent user list.")
parser.add_argument(
"external_retrieval_model",
type=dict,
required=False,
nullable=True,
location="json",
help="Invalid external retrieval model.",
)
parser.add_argument(
"external_knowledge_id",
type=str,
required=False,
nullable=True,
location="json",
help="Invalid external knowledge id.",
)
parser.add_argument(
"external_knowledge_api_id",
type=str,
required=False,
nullable=True,
location="json",
help="Invalid external knowledge api id.",
)
args = parser.parse_args()
data = request.get_json()

View File

@ -0,0 +1,239 @@
from flask import request
from flask_login import current_user
from flask_restful import Resource, marshal, reqparse
from werkzeug.exceptions import Forbidden, InternalServerError, NotFound
import services
from controllers.console import api
from controllers.console.datasets.error import DatasetNameDuplicateError
from controllers.console.setup import setup_required
from controllers.console.wraps import account_initialization_required
from fields.dataset_fields import dataset_detail_fields
from libs.login import login_required
from services.dataset_service import DatasetService
from services.external_knowledge_service import ExternalDatasetService
from services.hit_testing_service import HitTestingService
def _validate_name(name):
if not name or len(name) < 1 or len(name) > 100:
raise ValueError("Name must be between 1 to 100 characters.")
return name
def _validate_description_length(description):
if description and len(description) > 400:
raise ValueError("Description cannot exceed 400 characters.")
return description
class ExternalApiTemplateListApi(Resource):
@setup_required
@login_required
@account_initialization_required
def get(self):
page = request.args.get("page", default=1, type=int)
limit = request.args.get("limit", default=20, type=int)
search = request.args.get("keyword", default=None, type=str)
external_knowledge_apis, total = ExternalDatasetService.get_external_knowledge_apis(
page, limit, current_user.current_tenant_id, search
)
response = {
"data": [item.to_dict() for item in external_knowledge_apis],
"has_more": len(external_knowledge_apis) == limit,
"limit": limit,
"total": total,
"page": page,
}
return response, 200
@setup_required
@login_required
@account_initialization_required
def post(self):
parser = reqparse.RequestParser()
parser.add_argument(
"name",
nullable=False,
required=True,
help="Name is required. Name must be between 1 to 100 characters.",
type=_validate_name,
)
parser.add_argument(
"settings",
type=dict,
location="json",
nullable=False,
required=True,
)
args = parser.parse_args()
ExternalDatasetService.validate_api_list(args["settings"])
# The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator
if not current_user.is_dataset_editor:
raise Forbidden()
try:
external_knowledge_api = ExternalDatasetService.create_external_knowledge_api(
tenant_id=current_user.current_tenant_id, user_id=current_user.id, args=args
)
except services.errors.dataset.DatasetNameDuplicateError:
raise DatasetNameDuplicateError()
return external_knowledge_api.to_dict(), 201
class ExternalApiTemplateApi(Resource):
@setup_required
@login_required
@account_initialization_required
def get(self, external_knowledge_api_id):
external_knowledge_api_id = str(external_knowledge_api_id)
external_knowledge_api = ExternalDatasetService.get_external_knowledge_api(external_knowledge_api_id)
if external_knowledge_api is None:
raise NotFound("API template not found.")
return external_knowledge_api.to_dict(), 200
@setup_required
@login_required
@account_initialization_required
def patch(self, external_knowledge_api_id):
external_knowledge_api_id = str(external_knowledge_api_id)
parser = reqparse.RequestParser()
parser.add_argument(
"name",
nullable=False,
required=True,
help="type is required. Name must be between 1 to 100 characters.",
type=_validate_name,
)
parser.add_argument(
"settings",
type=dict,
location="json",
nullable=False,
required=True,
)
args = parser.parse_args()
ExternalDatasetService.validate_api_list(args["settings"])
external_knowledge_api = ExternalDatasetService.update_external_knowledge_api(
tenant_id=current_user.current_tenant_id,
user_id=current_user.id,
external_knowledge_api_id=external_knowledge_api_id,
args=args,
)
return external_knowledge_api.to_dict(), 200
@setup_required
@login_required
@account_initialization_required
def delete(self, external_knowledge_api_id):
external_knowledge_api_id = str(external_knowledge_api_id)
# The role of the current user in the ta table must be admin, owner, or editor
if not current_user.is_editor or current_user.is_dataset_operator:
raise Forbidden()
ExternalDatasetService.delete_external_knowledge_api(current_user.current_tenant_id, external_knowledge_api_id)
return {"result": "success"}, 200
class ExternalApiUseCheckApi(Resource):
@setup_required
@login_required
@account_initialization_required
def get(self, external_knowledge_api_id):
external_knowledge_api_id = str(external_knowledge_api_id)
external_knowledge_api_is_using, count = ExternalDatasetService.external_knowledge_api_use_check(
external_knowledge_api_id
)
return {"is_using": external_knowledge_api_is_using, "count": count}, 200
class ExternalDatasetCreateApi(Resource):
@setup_required
@login_required
@account_initialization_required
def post(self):
# The role of the current user in the ta table must be admin, owner, or editor
if not current_user.is_editor:
raise Forbidden()
parser = reqparse.RequestParser()
parser.add_argument("external_knowledge_api_id", type=str, required=True, nullable=False, location="json")
parser.add_argument("external_knowledge_id", type=str, required=True, nullable=False, location="json")
parser.add_argument(
"name",
nullable=False,
required=True,
help="name is required. Name must be between 1 to 100 characters.",
type=_validate_name,
)
parser.add_argument("description", type=str, required=False, nullable=True, location="json")
parser.add_argument("external_retrieval_model", type=dict, required=False, location="json")
args = parser.parse_args()
# The role of the current user in the ta table must be admin, owner, or editor, or dataset_operator
if not current_user.is_dataset_editor:
raise Forbidden()
try:
dataset = ExternalDatasetService.create_external_dataset(
tenant_id=current_user.current_tenant_id,
user_id=current_user.id,
args=args,
)
except services.errors.dataset.DatasetNameDuplicateError:
raise DatasetNameDuplicateError()
return marshal(dataset, dataset_detail_fields), 201
class ExternalKnowledgeHitTestingApi(Resource):
@setup_required
@login_required
@account_initialization_required
def post(self, dataset_id):
dataset_id_str = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id_str)
if dataset is None:
raise NotFound("Dataset not found.")
try:
DatasetService.check_dataset_permission(dataset, current_user)
except services.errors.account.NoPermissionError as e:
raise Forbidden(str(e))
parser = reqparse.RequestParser()
parser.add_argument("query", type=str, location="json")
parser.add_argument("external_retrieval_model", type=dict, required=False, location="json")
args = parser.parse_args()
HitTestingService.hit_testing_args_check(args)
try:
response = HitTestingService.external_retrieve(
dataset=dataset,
query=args["query"],
account=current_user,
external_retrieval_model=args["external_retrieval_model"],
)
return response
except Exception as e:
raise InternalServerError(str(e))
api.add_resource(ExternalKnowledgeHitTestingApi, "/datasets/<uuid:dataset_id>/external-hit-testing")
api.add_resource(ExternalDatasetCreateApi, "/datasets/external")
api.add_resource(ExternalApiTemplateListApi, "/datasets/external-knowledge-api")
api.add_resource(ExternalApiTemplateApi, "/datasets/external-knowledge-api/<uuid:external_knowledge_api_id>")
api.add_resource(ExternalApiUseCheckApi, "/datasets/external-knowledge-api/<uuid:external_knowledge_api_id>/use-check")

View File

@ -47,6 +47,7 @@ class HitTestingApi(Resource):
parser = reqparse.RequestParser()
parser.add_argument("query", type=str, location="json")
parser.add_argument("retrieval_model", type=dict, required=False, location="json")
parser.add_argument("external_retrieval_model", type=dict, required=False, location="json")
args = parser.parse_args()
HitTestingService.hit_testing_args_check(args)
@ -57,6 +58,7 @@ class HitTestingApi(Resource):
query=args["query"],
account=current_user,
retrieval_model=args["retrieval_model"],
external_retrieval_model=args["external_retrieval_model"],
limit=10,
)

View File

@ -28,11 +28,11 @@ class DatasetListApi(DatasetApiResource):
page = request.args.get("page", default=1, type=int)
limit = request.args.get("limit", default=20, type=int)
provider = request.args.get("provider", default="vendor")
# provider = request.args.get("provider", default="vendor")
search = request.args.get("keyword", default=None, type=str)
tag_ids = request.args.getlist("tag_ids")
datasets, total = DatasetService.get_datasets(page, limit, provider, tenant_id, current_user, search, tag_ids)
datasets, total = DatasetService.get_datasets(page, limit, tenant_id, current_user, search, tag_ids)
# check embedding setting
provider_manager = ProviderManager()
configurations = provider_manager.get_configurations(tenant_id=current_user.current_tenant_id)
@ -82,6 +82,26 @@ class DatasetListApi(DatasetApiResource):
required=False,
nullable=False,
)
parser.add_argument(
"external_knowledge_api_id",
type=str,
nullable=True,
required=False,
default="_validate_name",
)
parser.add_argument(
"provider",
type=str,
nullable=True,
required=False,
default="vendor",
)
parser.add_argument(
"external_knowledge_id",
type=str,
nullable=True,
required=False,
)
args = parser.parse_args()
try:
@ -91,6 +111,9 @@ class DatasetListApi(DatasetApiResource):
indexing_technique=args["indexing_technique"],
account=current_user,
permission=args["permission"],
provider=args["provider"],
external_knowledge_api_id=args["external_knowledge_api_id"],
external_knowledge_id=args["external_knowledge_id"],
)
except services.errors.dataset.DatasetNameDuplicateError:
raise DatasetNameDuplicateError()

View File

@ -59,7 +59,7 @@ class DatasetIndexToolCallbackHandler:
for item in resource:
dataset_retriever_resource = DatasetRetrieverResource(
message_id=self._message_id,
position=item.get("position"),
position=item.get("position") or 0,
dataset_id=item.get("dataset_id"),
dataset_name=item.get("dataset_name"),
document_id=item.get("document_id"),

View File

@ -10,6 +10,7 @@ from core.rag.rerank.constants.rerank_mode import RerankMode
from core.rag.retrieval.retrieval_methods import RetrievalMethod
from extensions.ext_database import db
from models.dataset import Dataset
from services.external_knowledge_service import ExternalDatasetService
default_retrieval_model = {
"search_method": RetrievalMethod.SEMANTIC_SEARCH.value,
@ -34,6 +35,9 @@ class RetrievalService:
weights: Optional[dict] = None,
):
dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
if not dataset:
return []
if not dataset or dataset.available_document_count == 0 or dataset.available_segment_count == 0:
return []
all_documents = []
@ -108,6 +112,16 @@ class RetrievalService:
)
return all_documents
@classmethod
def external_retrieve(cls, dataset_id: str, query: str, external_retrieval_model: Optional[dict] = None):
dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
if not dataset:
return []
all_documents = ExternalDatasetService.fetch_external_knowledge_retrieval(
dataset.tenant_id, dataset_id, query, external_retrieval_model
)
return all_documents
@classmethod
def keyword_search(
cls, flask_app: Flask, dataset_id: str, query: str, top_k: int, all_documents: list, exceptions: list

View File

@ -0,0 +1,10 @@
from pydantic import BaseModel
class DocumentContext(BaseModel):
"""
Model class for document context.
"""
content: str
score: float

View File

@ -17,6 +17,8 @@ class Document(BaseModel):
"""
metadata: Optional[dict] = Field(default_factory=dict)
provider: Optional[str] = "dify"
class BaseDocumentTransformer(ABC):
"""Abstract base class for document transformation systems.

View File

@ -28,11 +28,16 @@ class RerankModelRunner:
docs = []
doc_id = []
unique_documents = []
for document in documents:
dify_documents = [item for item in documents if item.provider == "dify"]
external_documents = [item for item in documents if item.provider == "external"]
for document in dify_documents:
if document.metadata["doc_id"] not in doc_id:
doc_id.append(document.metadata["doc_id"])
docs.append(document.page_content)
unique_documents.append(document)
for document in external_documents:
docs.append(document.page_content)
unique_documents.append(document)
documents = unique_documents
@ -46,14 +51,10 @@ class RerankModelRunner:
# format document
rerank_document = Document(
page_content=result.text,
metadata={
"doc_id": documents[result.index].metadata["doc_id"],
"doc_hash": documents[result.index].metadata["doc_hash"],
"document_id": documents[result.index].metadata["document_id"],
"dataset_id": documents[result.index].metadata["dataset_id"],
"score": result.score,
},
metadata=documents[result.index].metadata,
provider=documents[result.index].provider,
)
rerank_document.metadata["score"] = result.score
rerank_documents.append(rerank_document)
return rerank_documents

View File

@ -20,6 +20,7 @@ from core.ops.utils import measure_time
from core.rag.data_post_processor.data_post_processor import DataPostProcessor
from core.rag.datasource.keyword.jieba.jieba_keyword_table_handler import JiebaKeywordTableHandler
from core.rag.datasource.retrieval_service import RetrievalService
from core.rag.entities.context_entities import DocumentContext
from core.rag.models.document import Document
from core.rag.retrieval.retrieval_methods import RetrievalMethod
from core.rag.retrieval.router.multi_dataset_function_call_router import FunctionCallMultiDatasetRouter
@ -30,6 +31,7 @@ from core.tools.tool.dataset_retriever.dataset_retriever_tool import DatasetRetr
from extensions.ext_database import db
from models.dataset import Dataset, DatasetQuery, DocumentSegment
from models.dataset import Document as DatasetDocument
from services.external_knowledge_service import ExternalDatasetService
default_retrieval_model = {
"search_method": RetrievalMethod.SEMANTIC_SEARCH.value,
@ -110,7 +112,7 @@ class DatasetRetrieval:
continue
# pass if dataset is not available
if dataset and dataset.available_document_count == 0:
if dataset and dataset.available_document_count == 0 and dataset.provider != "external":
continue
available_datasets.append(dataset)
@ -146,69 +148,93 @@ class DatasetRetrieval:
message_id,
)
document_score_list = {}
for item in all_documents:
if item.metadata.get("score"):
document_score_list[item.metadata["doc_id"]] = item.metadata["score"]
dify_documents = [item for item in all_documents if item.provider == "dify"]
external_documents = [item for item in all_documents if item.provider == "external"]
document_context_list = []
index_node_ids = [document.metadata["doc_id"] for document in all_documents]
segments = DocumentSegment.query.filter(
DocumentSegment.dataset_id.in_(dataset_ids),
DocumentSegment.completed_at.isnot(None),
DocumentSegment.status == "completed",
DocumentSegment.enabled == True,
DocumentSegment.index_node_id.in_(index_node_ids),
).all()
retrieval_resource_list = []
# deal with external documents
for item in external_documents:
document_context_list.append(DocumentContext(content=item.page_content, score=item.metadata.get("score")))
source = {
"dataset_id": item.metadata.get("dataset_id"),
"dataset_name": item.metadata.get("dataset_name"),
"document_name": item.metadata.get("title"),
"data_source_type": "external",
"retriever_from": invoke_from.to_source(),
"score": item.metadata.get("score"),
"content": item.page_content,
}
retrieval_resource_list.append(source)
document_score_list = {}
# deal with dify documents
if dify_documents:
for item in dify_documents:
if item.metadata.get("score"):
document_score_list[item.metadata["doc_id"]] = item.metadata["score"]
if segments:
index_node_id_to_position = {id: position for position, id in enumerate(index_node_ids)}
sorted_segments = sorted(
segments, key=lambda segment: index_node_id_to_position.get(segment.index_node_id, float("inf"))
)
for segment in sorted_segments:
if segment.answer:
document_context_list.append(f"question:{segment.get_sign_content()} answer:{segment.answer}")
else:
document_context_list.append(segment.get_sign_content())
if show_retrieve_source:
context_list = []
resource_number = 1
index_node_ids = [document.metadata["doc_id"] for document in dify_documents]
segments = DocumentSegment.query.filter(
DocumentSegment.dataset_id.in_(dataset_ids),
DocumentSegment.status == "completed",
DocumentSegment.enabled == True,
DocumentSegment.index_node_id.in_(index_node_ids),
).all()
if segments:
index_node_id_to_position = {id: position for position, id in enumerate(index_node_ids)}
sorted_segments = sorted(
segments, key=lambda segment: index_node_id_to_position.get(segment.index_node_id, float("inf"))
)
for segment in sorted_segments:
dataset = Dataset.query.filter_by(id=segment.dataset_id).first()
document = DatasetDocument.query.filter(
DatasetDocument.id == segment.document_id,
DatasetDocument.enabled == True,
DatasetDocument.archived == False,
).first()
if dataset and document:
source = {
"position": resource_number,
"dataset_id": dataset.id,
"dataset_name": dataset.name,
"document_id": document.id,
"document_name": document.name,
"data_source_type": document.data_source_type,
"segment_id": segment.id,
"retriever_from": invoke_from.to_source(),
"score": document_score_list.get(segment.index_node_id, None),
}
if segment.answer:
document_context_list.append(
DocumentContext(
content=f"question:{segment.get_sign_content()} answer:{segment.answer}",
score=document_score_list.get(segment.index_node_id, None),
)
)
else:
document_context_list.append(
DocumentContext(
content=segment.get_sign_content(),
score=document_score_list.get(segment.index_node_id, None),
)
)
if show_retrieve_source:
for segment in sorted_segments:
dataset = Dataset.query.filter_by(id=segment.dataset_id).first()
document = DatasetDocument.query.filter(
DatasetDocument.id == segment.document_id,
DatasetDocument.enabled == True,
DatasetDocument.archived == False,
).first()
if dataset and document:
source = {
"dataset_id": dataset.id,
"dataset_name": dataset.name,
"document_id": document.id,
"document_name": document.name,
"data_source_type": document.data_source_type,
"segment_id": segment.id,
"retriever_from": invoke_from.to_source(),
"score": document_score_list.get(segment.index_node_id, None),
}
if invoke_from.to_source() == "dev":
source["hit_count"] = segment.hit_count
source["word_count"] = segment.word_count
source["segment_position"] = segment.position
source["index_node_hash"] = segment.index_node_hash
if segment.answer:
source["content"] = f"question:{segment.content} \nanswer:{segment.answer}"
else:
source["content"] = segment.content
context_list.append(source)
resource_number += 1
if hit_callback:
hit_callback.return_retriever_resource_info(context_list)
return str("\n".join(document_context_list))
if invoke_from.to_source() == "dev":
source["hit_count"] = segment.hit_count
source["word_count"] = segment.word_count
source["segment_position"] = segment.position
source["index_node_hash"] = segment.index_node_hash
if segment.answer:
source["content"] = f"question:{segment.content} \nanswer:{segment.answer}"
else:
source["content"] = segment.content
retrieval_resource_list.append(source)
if hit_callback and retrieval_resource_list:
hit_callback.return_retriever_resource_info(retrieval_resource_list)
if document_context_list:
document_context_list = sorted(document_context_list, key=lambda x: x.score, reverse=True)
return str("\n".join([document_context.content for document_context in document_context_list]))
return ""
def single_retrieve(
@ -256,36 +282,58 @@ class DatasetRetrieval:
# get retrieval model config
dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
if dataset:
retrieval_model_config = dataset.retrieval_model or default_retrieval_model
# get top k
top_k = retrieval_model_config["top_k"]
# get retrieval method
if dataset.indexing_technique == "economy":
retrieval_method = "keyword_search"
else:
retrieval_method = retrieval_model_config["search_method"]
# get reranking model
reranking_model = (
retrieval_model_config["reranking_model"] if retrieval_model_config["reranking_enable"] else None
)
# get score threshold
score_threshold = 0.0
score_threshold_enabled = retrieval_model_config.get("score_threshold_enabled")
if score_threshold_enabled:
score_threshold = retrieval_model_config.get("score_threshold")
with measure_time() as timer:
results = RetrievalService.retrieve(
retrieval_method=retrieval_method,
dataset_id=dataset.id,
results = []
if dataset.provider == "external":
external_documents = ExternalDatasetService.fetch_external_knowledge_retrieval(
tenant_id=dataset.tenant_id,
dataset_id=dataset_id,
query=query,
top_k=top_k,
score_threshold=score_threshold,
reranking_model=reranking_model,
reranking_mode=retrieval_model_config.get("reranking_mode", "reranking_model"),
weights=retrieval_model_config.get("weights", None),
external_retrieval_parameters=dataset.retrieval_model,
)
for external_document in external_documents:
document = Document(
page_content=external_document.get("content"),
metadata=external_document.get("metadata"),
provider="external",
)
document.metadata["score"] = external_document.get("score")
document.metadata["title"] = external_document.get("title")
document.metadata["dataset_id"] = dataset_id
document.metadata["dataset_name"] = dataset.name
results.append(document)
else:
retrieval_model_config = dataset.retrieval_model or default_retrieval_model
# get top k
top_k = retrieval_model_config["top_k"]
# get retrieval method
if dataset.indexing_technique == "economy":
retrieval_method = "keyword_search"
else:
retrieval_method = retrieval_model_config["search_method"]
# get reranking model
reranking_model = (
retrieval_model_config["reranking_model"]
if retrieval_model_config["reranking_enable"]
else None
)
# get score threshold
score_threshold = 0.0
score_threshold_enabled = retrieval_model_config.get("score_threshold_enabled")
if score_threshold_enabled:
score_threshold = retrieval_model_config.get("score_threshold")
with measure_time() as timer:
results = RetrievalService.retrieve(
retrieval_method=retrieval_method,
dataset_id=dataset.id,
query=query,
top_k=top_k,
score_threshold=score_threshold,
reranking_model=reranking_model,
reranking_mode=retrieval_model_config.get("reranking_mode", "reranking_model"),
weights=retrieval_model_config.get("weights", None),
)
self._on_query(query, [dataset_id], app_id, user_from, user_id)
if results:
@ -356,7 +404,8 @@ class DatasetRetrieval:
self, documents: list[Document], message_id: Optional[str] = None, timer: Optional[dict] = None
) -> None:
"""Handle retrieval end."""
for document in documents:
dify_documents = [document for document in documents if document.provider == "dify"]
for document in dify_documents:
query = db.session.query(DocumentSegment).filter(
DocumentSegment.index_node_id == document.metadata["doc_id"]
)
@ -409,35 +458,54 @@ class DatasetRetrieval:
if not dataset:
return []
# get retrieval model , if the model is not setting , using default
retrieval_model = dataset.retrieval_model or default_retrieval_model
if dataset.indexing_technique == "economy":
# use keyword table query
documents = RetrievalService.retrieve(
retrieval_method="keyword_search", dataset_id=dataset.id, query=query, top_k=top_k
if dataset.provider == "external":
external_documents = ExternalDatasetService.fetch_external_knowledge_retrieval(
tenant_id=dataset.tenant_id,
dataset_id=dataset_id,
query=query,
external_retrieval_parameters=dataset.retrieval_model,
)
if documents:
all_documents.extend(documents)
else:
if top_k > 0:
# retrieval source
documents = RetrievalService.retrieve(
retrieval_method=retrieval_model["search_method"],
dataset_id=dataset.id,
query=query,
top_k=retrieval_model.get("top_k") or 2,
score_threshold=retrieval_model.get("score_threshold", 0.0)
if retrieval_model["score_threshold_enabled"]
else 0.0,
reranking_model=retrieval_model.get("reranking_model", None)
if retrieval_model["reranking_enable"]
else None,
reranking_mode=retrieval_model.get("reranking_mode") or "reranking_model",
weights=retrieval_model.get("weights", None),
for external_document in external_documents:
document = Document(
page_content=external_document.get("content"),
metadata=external_document.get("metadata"),
provider="external",
)
document.metadata["score"] = external_document.get("score")
document.metadata["title"] = external_document.get("title")
document.metadata["dataset_id"] = dataset_id
document.metadata["dataset_name"] = dataset.name
all_documents.append(document)
else:
# get retrieval model , if the model is not setting , using default
retrieval_model = dataset.retrieval_model or default_retrieval_model
all_documents.extend(documents)
if dataset.indexing_technique == "economy":
# use keyword table query
documents = RetrievalService.retrieve(
retrieval_method="keyword_search", dataset_id=dataset.id, query=query, top_k=top_k
)
if documents:
all_documents.extend(documents)
else:
if top_k > 0:
# retrieval source
documents = RetrievalService.retrieve(
retrieval_method=retrieval_model["search_method"],
dataset_id=dataset.id,
query=query,
top_k=retrieval_model.get("top_k") or 2,
score_threshold=retrieval_model.get("score_threshold", 0.0)
if retrieval_model["score_threshold_enabled"]
else 0.0,
reranking_model=retrieval_model.get("reranking_model", None)
if retrieval_model["reranking_enable"]
else None,
reranking_mode=retrieval_model.get("reranking_mode") or "reranking_model",
weights=retrieval_model.get("weights", None),
)
all_documents.extend(documents)
def to_dataset_retriever_tool(
self,

View File

@ -156,16 +156,34 @@ class KnowledgeRetrievalNode(BaseNode):
weights,
node_data.multiple_retrieval_config.reranking_enable,
)
context_list = []
if all_documents:
dify_documents = [item for item in all_documents if item.provider == "dify"]
external_documents = [item for item in all_documents if item.provider == "external"]
retrieval_resource_list = []
# deal with external documents
for item in external_documents:
source = {
"metadata": {
"_source": "knowledge",
"dataset_id": item.metadata.get("dataset_id"),
"dataset_name": item.metadata.get("dataset_name"),
"document_name": item.metadata.get("title"),
"data_source_type": "external",
"retriever_from": "workflow",
"score": item.metadata.get("score"),
},
"title": item.metadata.get("title"),
"content": item.page_content,
}
retrieval_resource_list.append(source)
document_score_list = {}
# deal with dify documents
if dify_documents:
document_score_list = {}
page_number_list = {}
for item in all_documents:
for item in dify_documents:
if item.metadata.get("score"):
document_score_list[item.metadata["doc_id"]] = item.metadata["score"]
index_node_ids = [document.metadata["doc_id"] for document in all_documents]
index_node_ids = [document.metadata["doc_id"] for document in dify_documents]
segments = DocumentSegment.query.filter(
DocumentSegment.dataset_id.in_(dataset_ids),
DocumentSegment.completed_at.isnot(None),
@ -186,13 +204,10 @@ class KnowledgeRetrievalNode(BaseNode):
Document.enabled == True,
Document.archived == False,
).first()
resource_number = 1
if dataset and document:
source = {
"metadata": {
"_source": "knowledge",
"position": resource_number,
"dataset_id": dataset.id,
"dataset_name": dataset.name,
"document_id": document.id,
@ -212,9 +227,14 @@ class KnowledgeRetrievalNode(BaseNode):
source["content"] = f"question:{segment.get_sign_content()} \nanswer:{segment.answer}"
else:
source["content"] = segment.get_sign_content()
context_list.append(source)
resource_number += 1
return context_list
retrieval_resource_list.append(source)
if retrieval_resource_list:
retrieval_resource_list = sorted(retrieval_resource_list, key=lambda x: x.get("score"), reverse=True)
position = 1
for item in retrieval_resource_list:
item["metadata"]["position"] = position
position += 1
return retrieval_resource_list
@classmethod
def _extract_variable_selector_to_variable_mapping(

View File

@ -38,9 +38,20 @@ dataset_retrieval_model_fields = {
"score_threshold_enabled": fields.Boolean,
"score_threshold": fields.Float,
}
external_retrieval_model_fields = {
"top_k": fields.Integer,
"score_threshold": fields.Float,
}
tag_fields = {"id": fields.String, "name": fields.String, "type": fields.String}
external_knowledge_info_fields = {
"external_knowledge_id": fields.String,
"external_knowledge_api_id": fields.String,
"external_knowledge_api_name": fields.String,
"external_knowledge_api_endpoint": fields.String,
}
dataset_detail_fields = {
"id": fields.String,
"name": fields.String,
@ -61,6 +72,8 @@ dataset_detail_fields = {
"embedding_available": fields.Boolean,
"retrieval_model_dict": fields.Nested(dataset_retrieval_model_fields),
"tags": fields.List(fields.Nested(tag_fields)),
"external_knowledge_info": fields.Nested(external_knowledge_info_fields),
"external_retrieval_model": fields.Nested(external_retrieval_model_fields, allow_null=True),
}
dataset_query_detail_fields = {

View File

@ -0,0 +1,11 @@
from flask_restful import fields
from libs.helper import TimestampField
external_knowledge_api_query_detail_fields = {
"id": fields.String,
"name": fields.String,
"setting": fields.String,
"created_by": fields.String,
"created_at": TimestampField,
}

View File

@ -0,0 +1,48 @@
"""update-retrieval-resource
Revision ID: 6af6a521a53e
Revises: ec3df697ebbb
Create Date: 2024-09-24 09:22:43.570120
"""
from alembic import op
import models as models
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = '6af6a521a53e'
down_revision = 'd57ba9ebb251'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('dataset_retriever_resources', schema=None) as batch_op:
batch_op.alter_column('document_id',
existing_type=sa.UUID(),
nullable=True)
batch_op.alter_column('data_source_type',
existing_type=sa.TEXT(),
nullable=True)
batch_op.alter_column('segment_id',
existing_type=sa.UUID(),
nullable=True)
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('dataset_retriever_resources', schema=None) as batch_op:
batch_op.alter_column('segment_id',
existing_type=sa.UUID(),
nullable=False)
batch_op.alter_column('data_source_type',
existing_type=sa.TEXT(),
nullable=False)
batch_op.alter_column('document_id',
existing_type=sa.UUID(),
nullable=False)
# ### end Alembic commands ###

View File

@ -0,0 +1,73 @@
"""external_knowledge_api
Revision ID: 33f5fac87f29
Revises: 6af6a521a53e
Create Date: 2024-09-25 04:34:57.249436
"""
from alembic import op
import models as models
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
# revision identifiers, used by Alembic.
revision = '33f5fac87f29'
down_revision = '6af6a521a53e'
branch_labels = None
depends_on = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('external_knowledge_apis',
sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False),
sa.Column('name', sa.String(length=255), nullable=False),
sa.Column('description', sa.String(length=255), nullable=False),
sa.Column('tenant_id', models.types.StringUUID(), nullable=False),
sa.Column('settings', sa.Text(), nullable=True),
sa.Column('created_by', models.types.StringUUID(), nullable=False),
sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False),
sa.Column('updated_by', models.types.StringUUID(), nullable=True),
sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False),
sa.PrimaryKeyConstraint('id', name='external_knowledge_apis_pkey')
)
with op.batch_alter_table('external_knowledge_apis', schema=None) as batch_op:
batch_op.create_index('external_knowledge_apis_name_idx', ['name'], unique=False)
batch_op.create_index('external_knowledge_apis_tenant_idx', ['tenant_id'], unique=False)
op.create_table('external_knowledge_bindings',
sa.Column('id', models.types.StringUUID(), server_default=sa.text('uuid_generate_v4()'), nullable=False),
sa.Column('tenant_id', models.types.StringUUID(), nullable=False),
sa.Column('external_knowledge_api_id', models.types.StringUUID(), nullable=False),
sa.Column('dataset_id', models.types.StringUUID(), nullable=False),
sa.Column('external_knowledge_id', sa.Text(), nullable=False),
sa.Column('created_by', models.types.StringUUID(), nullable=False),
sa.Column('created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False),
sa.Column('updated_by', models.types.StringUUID(), nullable=True),
sa.Column('updated_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP(0)'), nullable=False),
sa.PrimaryKeyConstraint('id', name='external_knowledge_bindings_pkey')
)
with op.batch_alter_table('external_knowledge_bindings', schema=None) as batch_op:
batch_op.create_index('external_knowledge_bindings_dataset_idx', ['dataset_id'], unique=False)
batch_op.create_index('external_knowledge_bindings_external_knowledge_api_idx', ['external_knowledge_api_id'], unique=False)
batch_op.create_index('external_knowledge_bindings_external_knowledge_idx', ['external_knowledge_id'], unique=False)
batch_op.create_index('external_knowledge_bindings_tenant_idx', ['tenant_id'], unique=False)
# ### end Alembic commands ###
def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('external_knowledge_bindings', schema=None) as batch_op:
batch_op.drop_index('external_knowledge_bindings_tenant_idx')
batch_op.drop_index('external_knowledge_bindings_external_knowledge_idx')
batch_op.drop_index('external_knowledge_bindings_external_knowledge_api_idx')
batch_op.drop_index('external_knowledge_bindings_dataset_idx')
op.drop_table('external_knowledge_bindings')
with op.batch_alter_table('external_knowledge_apis', schema=None) as batch_op:
batch_op.drop_index('external_knowledge_apis_tenant_idx')
batch_op.drop_index('external_knowledge_apis_name_idx')
op.drop_table('external_knowledge_apis')
# ### end Alembic commands ###

View File

@ -1,4 +1,4 @@
"""add-dataset-retrival-model
"""add-dataset-retrieval-model
Revision ID: fca025d3b60f
Revises: b3a09c049e8e

View File

@ -38,6 +38,7 @@ class Dataset(db.Model):
)
INDEXING_TECHNIQUE_LIST = ["high_quality", "economy", None]
PROVIDER_LIST = ["vendor", "external", None]
id = db.Column(StringUUID, server_default=db.text("uuid_generate_v4()"))
tenant_id = db.Column(StringUUID, nullable=False)
@ -71,6 +72,14 @@ class Dataset(db.Model):
def index_struct_dict(self):
return json.loads(self.index_struct) if self.index_struct else None
@property
def external_retrieval_model(self):
default_retrieval_model = {
"top_k": 2,
"score_threshold": 0.0,
}
return self.retrieval_model or default_retrieval_model
@property
def created_by_account(self):
return db.session.get(Account, self.created_by)
@ -162,6 +171,29 @@ class Dataset(db.Model):
return tags or []
@property
def external_knowledge_info(self):
if self.provider != "external":
return None
external_knowledge_binding = (
db.session.query(ExternalKnowledgeBindings).filter(ExternalKnowledgeBindings.dataset_id == self.id).first()
)
if not external_knowledge_binding:
return None
external_knowledge_api = (
db.session.query(ExternalKnowledgeApis)
.filter(ExternalKnowledgeApis.id == external_knowledge_binding.external_knowledge_api_id)
.first()
)
if not external_knowledge_api:
return None
return {
"external_knowledge_id": external_knowledge_binding.external_knowledge_id,
"external_knowledge_api_id": external_knowledge_api.id,
"external_knowledge_api_name": external_knowledge_api.name,
"external_knowledge_api_endpoint": json.loads(external_knowledge_api.settings).get("endpoint", ""),
}
@staticmethod
def gen_collection_name_by_id(dataset_id: str) -> str:
normalized_dataset_id = dataset_id.replace("-", "_")
@ -687,3 +719,77 @@ class DatasetPermission(db.Model):
tenant_id = db.Column(StringUUID, nullable=False)
has_permission = db.Column(db.Boolean, nullable=False, server_default=db.text("true"))
created_at = db.Column(db.DateTime, nullable=False, server_default=db.text("CURRENT_TIMESTAMP(0)"))
class ExternalKnowledgeApis(db.Model):
__tablename__ = "external_knowledge_apis"
__table_args__ = (
db.PrimaryKeyConstraint("id", name="external_knowledge_apis_pkey"),
db.Index("external_knowledge_apis_tenant_idx", "tenant_id"),
db.Index("external_knowledge_apis_name_idx", "name"),
)
id = db.Column(StringUUID, nullable=False, server_default=db.text("uuid_generate_v4()"))
name = db.Column(db.String(255), nullable=False)
description = db.Column(db.String(255), nullable=False)
tenant_id = db.Column(StringUUID, nullable=False)
settings = db.Column(db.Text, nullable=True)
created_by = db.Column(StringUUID, nullable=False)
created_at = db.Column(db.DateTime, nullable=False, server_default=db.text("CURRENT_TIMESTAMP(0)"))
updated_by = db.Column(StringUUID, nullable=True)
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.text("CURRENT_TIMESTAMP(0)"))
def to_dict(self):
return {
"id": self.id,
"tenant_id": self.tenant_id,
"name": self.name,
"description": self.description,
"settings": self.settings_dict,
"dataset_bindings": self.dataset_bindings,
"created_by": self.created_by,
"created_at": self.created_at.isoformat(),
}
@property
def settings_dict(self):
try:
return json.loads(self.settings) if self.settings else None
except JSONDecodeError:
return None
@property
def dataset_bindings(self):
external_knowledge_bindings = (
db.session.query(ExternalKnowledgeBindings)
.filter(ExternalKnowledgeBindings.external_knowledge_api_id == self.id)
.all()
)
dataset_ids = [binding.dataset_id for binding in external_knowledge_bindings]
datasets = db.session.query(Dataset).filter(Dataset.id.in_(dataset_ids)).all()
dataset_bindings = []
for dataset in datasets:
dataset_bindings.append({"id": dataset.id, "name": dataset.name})
return dataset_bindings
class ExternalKnowledgeBindings(db.Model):
__tablename__ = "external_knowledge_bindings"
__table_args__ = (
db.PrimaryKeyConstraint("id", name="external_knowledge_bindings_pkey"),
db.Index("external_knowledge_bindings_tenant_idx", "tenant_id"),
db.Index("external_knowledge_bindings_dataset_idx", "dataset_id"),
db.Index("external_knowledge_bindings_external_knowledge_idx", "external_knowledge_id"),
db.Index("external_knowledge_bindings_external_knowledge_api_idx", "external_knowledge_api_id"),
)
id = db.Column(StringUUID, nullable=False, server_default=db.text("uuid_generate_v4()"))
tenant_id = db.Column(StringUUID, nullable=False)
external_knowledge_api_id = db.Column(StringUUID, nullable=False)
dataset_id = db.Column(StringUUID, nullable=False)
external_knowledge_id = db.Column(db.Text, nullable=False)
created_by = db.Column(StringUUID, nullable=False)
created_at = db.Column(db.DateTime, nullable=False, server_default=db.text("CURRENT_TIMESTAMP(0)"))
updated_by = db.Column(StringUUID, nullable=True)
updated_at = db.Column(db.DateTime, nullable=False, server_default=db.text("CURRENT_TIMESTAMP(0)"))

View File

@ -1423,10 +1423,10 @@ class DatasetRetrieverResource(db.Model):
position = db.Column(db.Integer, nullable=False)
dataset_id = db.Column(StringUUID, nullable=False)
dataset_name = db.Column(db.Text, nullable=False)
document_id = db.Column(StringUUID, nullable=False)
document_id = db.Column(StringUUID, nullable=True)
document_name = db.Column(db.Text, nullable=False)
data_source_type = db.Column(db.Text, nullable=False)
segment_id = db.Column(StringUUID, nullable=False)
data_source_type = db.Column(db.Text, nullable=True)
segment_id = db.Column(StringUUID, nullable=True)
score = db.Column(db.Float, nullable=True)
content = db.Column(db.Text, nullable=False)
hit_count = db.Column(db.Integer, nullable=True)

1524
api/poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -221,6 +221,7 @@ volcengine-python-sdk = {extras = ["ark"], version = "^1.0.98"}
oci = "^2.133.0"
tos = "^2.7.1"
nomic = "^3.1.2"
validators = "0.21.0"
[tool.poetry.group.indriect.dependencies]
kaleido = "0.2.1"
rank-bm25 = "~0.2.2"

View File

@ -0,0 +1,92 @@
import datetime
import time
import click
from sqlalchemy import func
from werkzeug.exceptions import NotFound
import app
from configs import dify_config
from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
from extensions.ext_database import db
from models.dataset import Dataset, DatasetQuery, Document
@app.celery.task(queue="dataset")
def clean_unused_message_task():
click.echo(click.style("Start clean unused messages .", fg="green"))
clean_days = int(dify_config.CLEAN_DAY_SETTING)
start_at = time.perf_counter()
thirty_days_ago = datetime.datetime.now() - datetime.timedelta(days=clean_days)
page = 1
while True:
try:
# Subquery for counting new documents
document_subquery_new = (
db.session.query(Document.dataset_id, func.count(Document.id).label("document_count"))
.filter(
Document.indexing_status == "completed",
Document.enabled == True,
Document.archived == False,
Document.updated_at > thirty_days_ago,
)
.group_by(Document.dataset_id)
.subquery()
)
# Subquery for counting old documents
document_subquery_old = (
db.session.query(Document.dataset_id, func.count(Document.id).label("document_count"))
.filter(
Document.indexing_status == "completed",
Document.enabled == True,
Document.archived == False,
Document.updated_at < thirty_days_ago,
)
.group_by(Document.dataset_id)
.subquery()
)
# Main query with join and filter
datasets = (
db.session.query(Dataset)
.outerjoin(document_subquery_new, Dataset.id == document_subquery_new.c.dataset_id)
.outerjoin(document_subquery_old, Dataset.id == document_subquery_old.c.dataset_id)
.filter(
Dataset.created_at < thirty_days_ago,
func.coalesce(document_subquery_new.c.document_count, 0) == 0,
func.coalesce(document_subquery_old.c.document_count, 0) > 0,
)
.order_by(Dataset.created_at.desc())
.paginate(page=page, per_page=50)
)
except NotFound:
break
if datasets.items is None or len(datasets.items) == 0:
break
page += 1
for dataset in datasets:
dataset_query = (
db.session.query(DatasetQuery)
.filter(DatasetQuery.created_at > thirty_days_ago, DatasetQuery.dataset_id == dataset.id)
.all()
)
if not dataset_query or len(dataset_query) == 0:
try:
# remove index
index_processor = IndexProcessorFactory(dataset.doc_form).init_index_processor()
index_processor.clean(dataset, None)
# update document
update_params = {Document.enabled: False}
Document.query.filter_by(dataset_id=dataset.id).update(update_params)
db.session.commit()
click.echo(click.style("Cleaned unused dataset {} from db success!".format(dataset.id), fg="green"))
except Exception as e:
click.echo(
click.style("clean dataset index error: {} {}".format(e.__class__.__name__, str(e)), fg="red")
)
end_at = time.perf_counter()
click.echo(click.style("Cleaned unused dataset from db success latency: {}".format(end_at - start_at), fg="green"))

View File

@ -32,6 +32,7 @@ from models.dataset import (
DatasetQuery,
Document,
DocumentSegment,
ExternalKnowledgeBindings,
)
from models.model import UploadFile
from models.source import DataSourceOauthBinding
@ -39,6 +40,7 @@ from services.errors.account import NoPermissionError
from services.errors.dataset import DatasetNameDuplicateError
from services.errors.document import DocumentIndexingError
from services.errors.file import FileNotExistsError
from services.external_knowledge_service import ExternalDatasetService
from services.feature_service import FeatureModel, FeatureService
from services.tag_service import TagService
from services.vector_service import VectorService
@ -56,10 +58,8 @@ from tasks.sync_website_document_indexing_task import sync_website_document_inde
class DatasetService:
@staticmethod
def get_datasets(page, per_page, provider="vendor", tenant_id=None, user=None, search=None, tag_ids=None):
query = Dataset.query.filter(Dataset.provider == provider, Dataset.tenant_id == tenant_id).order_by(
Dataset.created_at.desc()
)
def get_datasets(page, per_page, tenant_id=None, user=None, search=None, tag_ids=None):
query = Dataset.query.filter(Dataset.tenant_id == tenant_id).order_by(Dataset.created_at.desc())
if user:
# get permitted dataset ids
@ -137,7 +137,14 @@ class DatasetService:
@staticmethod
def create_empty_dataset(
tenant_id: str, name: str, indexing_technique: Optional[str], account: Account, permission: Optional[str] = None
tenant_id: str,
name: str,
indexing_technique: Optional[str],
account: Account,
permission: Optional[str] = None,
provider: str = "vendor",
external_knowledge_api_id: Optional[str] = None,
external_knowledge_id: Optional[str] = None,
):
# check if dataset name already exists
if Dataset.query.filter_by(name=name, tenant_id=tenant_id).first():
@ -156,12 +163,28 @@ class DatasetService:
dataset.embedding_model_provider = embedding_model.provider if embedding_model else None
dataset.embedding_model = embedding_model.model if embedding_model else None
dataset.permission = permission or DatasetPermissionEnum.ONLY_ME
dataset.provider = provider
db.session.add(dataset)
db.session.flush()
if provider == "external" and external_knowledge_api_id:
external_knowledge_api = ExternalDatasetService.get_external_knowledge_api(external_knowledge_api_id)
if not external_knowledge_api:
raise ValueError("External API template not found.")
external_knowledge_binding = ExternalKnowledgeBindings(
tenant_id=tenant_id,
dataset_id=dataset.id,
external_knowledge_api_id=external_knowledge_api_id,
external_knowledge_id=external_knowledge_id,
created_by=account.id,
)
db.session.add(external_knowledge_binding)
db.session.commit()
return dataset
@staticmethod
def get_dataset(dataset_id):
def get_dataset(dataset_id) -> Dataset:
return Dataset.query.filter_by(id=dataset_id).first()
@staticmethod
@ -202,81 +225,103 @@ class DatasetService:
@staticmethod
def update_dataset(dataset_id, data, user):
data.pop("partial_member_list", None)
filtered_data = {k: v for k, v in data.items() if v is not None or k == "description"}
dataset = DatasetService.get_dataset(dataset_id)
DatasetService.check_dataset_permission(dataset, user)
action = None
if dataset.indexing_technique != data["indexing_technique"]:
# if update indexing_technique
if data["indexing_technique"] == "economy":
action = "remove"
filtered_data["embedding_model"] = None
filtered_data["embedding_model_provider"] = None
filtered_data["collection_binding_id"] = None
elif data["indexing_technique"] == "high_quality":
action = "add"
# get embedding model setting
try:
model_manager = ModelManager()
embedding_model = model_manager.get_model_instance(
tenant_id=current_user.current_tenant_id,
provider=data["embedding_model_provider"],
model_type=ModelType.TEXT_EMBEDDING,
model=data["embedding_model"],
)
filtered_data["embedding_model"] = embedding_model.model
filtered_data["embedding_model_provider"] = embedding_model.provider
dataset_collection_binding = DatasetCollectionBindingService.get_dataset_collection_binding(
embedding_model.provider, embedding_model.model
)
filtered_data["collection_binding_id"] = dataset_collection_binding.id
except LLMBadRequestError:
raise ValueError(
"No Embedding Model available. Please configure a valid provider "
"in the Settings -> Model Provider."
)
except ProviderTokenNotInitError as ex:
raise ValueError(ex.description)
else:
if dataset.provider == "external":
dataset.retrieval_model = data.get("external_retrieval_model", None)
dataset.name = data.get("name", dataset.name)
dataset.description = data.get("description", "")
external_knowledge_id = data.get("external_knowledge_id", None)
db.session.add(dataset)
if not external_knowledge_id:
raise ValueError("External knowledge id is required.")
external_knowledge_api_id = data.get("external_knowledge_api_id", None)
if not external_knowledge_api_id:
raise ValueError("External knowledge api id is required.")
external_knowledge_binding = ExternalKnowledgeBindings.query.filter_by(dataset_id=dataset_id).first()
if (
data["embedding_model_provider"] != dataset.embedding_model_provider
or data["embedding_model"] != dataset.embedding_model
external_knowledge_binding.external_knowledge_id != external_knowledge_id
or external_knowledge_binding.external_knowledge_api_id != external_knowledge_api_id
):
action = "update"
try:
model_manager = ModelManager()
embedding_model = model_manager.get_model_instance(
tenant_id=current_user.current_tenant_id,
provider=data["embedding_model_provider"],
model_type=ModelType.TEXT_EMBEDDING,
model=data["embedding_model"],
)
filtered_data["embedding_model"] = embedding_model.model
filtered_data["embedding_model_provider"] = embedding_model.provider
dataset_collection_binding = DatasetCollectionBindingService.get_dataset_collection_binding(
embedding_model.provider, embedding_model.model
)
filtered_data["collection_binding_id"] = dataset_collection_binding.id
except LLMBadRequestError:
raise ValueError(
"No Embedding Model available. Please configure a valid provider "
"in the Settings -> Model Provider."
)
except ProviderTokenNotInitError as ex:
raise ValueError(ex.description)
external_knowledge_binding.external_knowledge_id = external_knowledge_id
external_knowledge_binding.external_knowledge_api_id = external_knowledge_api_id
db.session.add(external_knowledge_binding)
db.session.commit()
else:
data.pop("partial_member_list", None)
filtered_data = {k: v for k, v in data.items() if v is not None or k == "description"}
action = None
if dataset.indexing_technique != data["indexing_technique"]:
# if update indexing_technique
if data["indexing_technique"] == "economy":
action = "remove"
filtered_data["embedding_model"] = None
filtered_data["embedding_model_provider"] = None
filtered_data["collection_binding_id"] = None
elif data["indexing_technique"] == "high_quality":
action = "add"
# get embedding model setting
try:
model_manager = ModelManager()
embedding_model = model_manager.get_model_instance(
tenant_id=current_user.current_tenant_id,
provider=data["embedding_model_provider"],
model_type=ModelType.TEXT_EMBEDDING,
model=data["embedding_model"],
)
filtered_data["embedding_model"] = embedding_model.model
filtered_data["embedding_model_provider"] = embedding_model.provider
dataset_collection_binding = DatasetCollectionBindingService.get_dataset_collection_binding(
embedding_model.provider, embedding_model.model
)
filtered_data["collection_binding_id"] = dataset_collection_binding.id
except LLMBadRequestError:
raise ValueError(
"No Embedding Model available. Please configure a valid provider "
"in the Settings -> Model Provider."
)
except ProviderTokenNotInitError as ex:
raise ValueError(ex.description)
else:
if (
data["embedding_model_provider"] != dataset.embedding_model_provider
or data["embedding_model"] != dataset.embedding_model
):
action = "update"
try:
model_manager = ModelManager()
embedding_model = model_manager.get_model_instance(
tenant_id=current_user.current_tenant_id,
provider=data["embedding_model_provider"],
model_type=ModelType.TEXT_EMBEDDING,
model=data["embedding_model"],
)
filtered_data["embedding_model"] = embedding_model.model
filtered_data["embedding_model_provider"] = embedding_model.provider
dataset_collection_binding = DatasetCollectionBindingService.get_dataset_collection_binding(
embedding_model.provider, embedding_model.model
)
filtered_data["collection_binding_id"] = dataset_collection_binding.id
except LLMBadRequestError:
raise ValueError(
"No Embedding Model available. Please configure a valid provider "
"in the Settings -> Model Provider."
)
except ProviderTokenNotInitError as ex:
raise ValueError(ex.description)
filtered_data["updated_by"] = user.id
filtered_data["updated_at"] = datetime.datetime.now()
filtered_data["updated_by"] = user.id
filtered_data["updated_at"] = datetime.datetime.now()
# update Retrieval model
filtered_data["retrieval_model"] = data["retrieval_model"]
# update Retrieval model
filtered_data["retrieval_model"] = data["retrieval_model"]
dataset.query.filter_by(id=dataset_id).update(filtered_data)
dataset.query.filter_by(id=dataset_id).update(filtered_data)
db.session.commit()
if action:
deal_dataset_vector_index_task.delay(dataset_id, action)
db.session.commit()
if action:
deal_dataset_vector_index_task.delay(dataset_id, action)
return dataset
@staticmethod

View File

@ -0,0 +1,26 @@
from typing import Literal, Optional, Union
from pydantic import BaseModel
class AuthorizationConfig(BaseModel):
type: Literal[None, "basic", "bearer", "custom"]
api_key: Union[None, str] = None
header: Union[None, str] = None
class Authorization(BaseModel):
type: Literal["no-auth", "api-key"]
config: Optional[AuthorizationConfig] = None
class ProcessStatusSetting(BaseModel):
request_method: str
url: str
class ExternalKnowledgeApiSetting(BaseModel):
url: str
request_method: str
headers: Optional[dict] = None
params: Optional[dict] = None

View File

@ -0,0 +1,274 @@
import json
from copy import deepcopy
from datetime import datetime, timezone
from typing import Any, Optional, Union
import httpx
import validators
# from tasks.external_document_indexing_task import external_document_indexing_task
from core.helper import ssrf_proxy
from extensions.ext_database import db
from models.dataset import (
Dataset,
ExternalKnowledgeApis,
ExternalKnowledgeBindings,
)
from services.entities.external_knowledge_entities.external_knowledge_entities import (
Authorization,
ExternalKnowledgeApiSetting,
)
from services.errors.dataset import DatasetNameDuplicateError
class ExternalDatasetService:
@staticmethod
def get_external_knowledge_apis(page, per_page, tenant_id, search=None) -> tuple[list[ExternalKnowledgeApis], int]:
query = ExternalKnowledgeApis.query.filter(ExternalKnowledgeApis.tenant_id == tenant_id).order_by(
ExternalKnowledgeApis.created_at.desc()
)
if search:
query = query.filter(ExternalKnowledgeApis.name.ilike(f"%{search}%"))
external_knowledge_apis = query.paginate(page=page, per_page=per_page, max_per_page=100, error_out=False)
return external_knowledge_apis.items, external_knowledge_apis.total
@classmethod
def validate_api_list(cls, api_settings: dict):
if not api_settings:
raise ValueError("api list is empty")
if "endpoint" not in api_settings and not api_settings["endpoint"]:
raise ValueError("endpoint is required")
if "api_key" not in api_settings and not api_settings["api_key"]:
raise ValueError("api_key is required")
@staticmethod
def create_external_knowledge_api(tenant_id: str, user_id: str, args: dict) -> ExternalKnowledgeApis:
ExternalDatasetService.check_endpoint_and_api_key(args.get("settings"))
external_knowledge_api = ExternalKnowledgeApis(
tenant_id=tenant_id,
created_by=user_id,
updated_by=user_id,
name=args.get("name"),
description=args.get("description", ""),
settings=json.dumps(args.get("settings"), ensure_ascii=False),
)
db.session.add(external_knowledge_api)
db.session.commit()
return external_knowledge_api
@staticmethod
def check_endpoint_and_api_key(settings: dict):
if "endpoint" not in settings or not settings["endpoint"]:
raise ValueError("endpoint is required")
if "api_key" not in settings or not settings["api_key"]:
raise ValueError("api_key is required")
endpoint = f"{settings['endpoint']}/retrieval"
api_key = settings["api_key"]
if not validators.url(endpoint):
raise ValueError(f"invalid endpoint: {endpoint}")
try:
response = httpx.post(endpoint, headers={"Authorization": f"Bearer {api_key}"})
except Exception as e:
raise ValueError(f"failed to connect to the endpoint: {endpoint}")
if response.status_code == 502:
raise ValueError(f"Bad Gateway: failed to connect to the endpoint: {endpoint}")
if response.status_code == 404:
raise ValueError(f"Not Found: failed to connect to the endpoint: {endpoint}")
if response.status_code == 403:
raise ValueError(f"Forbidden: Authorization failed with api_key: {api_key}")
@staticmethod
def get_external_knowledge_api(external_knowledge_api_id: str) -> ExternalKnowledgeApis:
return ExternalKnowledgeApis.query.filter_by(id=external_knowledge_api_id).first()
@staticmethod
def update_external_knowledge_api(tenant_id, user_id, external_knowledge_api_id, args) -> ExternalKnowledgeApis:
external_knowledge_api = ExternalKnowledgeApis.query.filter_by(
id=external_knowledge_api_id, tenant_id=tenant_id
).first()
if external_knowledge_api is None:
raise ValueError("api template not found")
external_knowledge_api.name = args.get("name")
external_knowledge_api.description = args.get("description", "")
external_knowledge_api.settings = json.dumps(args.get("settings"), ensure_ascii=False)
external_knowledge_api.updated_by = user_id
external_knowledge_api.updated_at = datetime.now(timezone.utc).replace(tzinfo=None)
db.session.commit()
return external_knowledge_api
@staticmethod
def delete_external_knowledge_api(tenant_id: str, external_knowledge_api_id: str):
external_knowledge_api = ExternalKnowledgeApis.query.filter_by(
id=external_knowledge_api_id, tenant_id=tenant_id
).first()
if external_knowledge_api is None:
raise ValueError("api template not found")
db.session.delete(external_knowledge_api)
db.session.commit()
@staticmethod
def external_knowledge_api_use_check(external_knowledge_api_id: str) -> tuple[bool, int]:
count = ExternalKnowledgeBindings.query.filter_by(external_knowledge_api_id=external_knowledge_api_id).count()
if count > 0:
return True, count
return False, 0
@staticmethod
def get_external_knowledge_binding_with_dataset_id(tenant_id: str, dataset_id: str) -> ExternalKnowledgeBindings:
external_knowledge_binding = ExternalKnowledgeBindings.query.filter_by(
dataset_id=dataset_id, tenant_id=tenant_id
).first()
if not external_knowledge_binding:
raise ValueError("external knowledge binding not found")
return external_knowledge_binding
@staticmethod
def document_create_args_validate(tenant_id: str, external_knowledge_api_id: str, process_parameter: dict):
external_knowledge_api = ExternalKnowledgeApis.query.filter_by(
id=external_knowledge_api_id, tenant_id=tenant_id
).first()
if external_knowledge_api is None:
raise ValueError("api template not found")
settings = json.loads(external_knowledge_api.settings)
for setting in settings:
custom_parameters = setting.get("document_process_setting")
if custom_parameters:
for parameter in custom_parameters:
if parameter.get("required", False) and not process_parameter.get(parameter.get("name")):
raise ValueError(f'{parameter.get("name")} is required')
@staticmethod
def process_external_api(
settings: ExternalKnowledgeApiSetting, files: Union[None, dict[str, Any]]
) -> httpx.Response:
"""
do http request depending on api bundle
"""
kwargs = {
"url": settings.url,
"headers": settings.headers,
"follow_redirects": True,
}
response = getattr(ssrf_proxy, settings.request_method)(data=json.dumps(settings.params), files=files, **kwargs)
return response
@staticmethod
def assembling_headers(authorization: Authorization, headers: Optional[dict] = None) -> dict[str, Any]:
authorization = deepcopy(authorization)
if headers:
headers = deepcopy(headers)
else:
headers = {}
if authorization.type == "api-key":
if authorization.config is None:
raise ValueError("authorization config is required")
if authorization.config.api_key is None:
raise ValueError("api_key is required")
if not authorization.config.header:
authorization.config.header = "Authorization"
if authorization.config.type == "bearer":
headers[authorization.config.header] = f"Bearer {authorization.config.api_key}"
elif authorization.config.type == "basic":
headers[authorization.config.header] = f"Basic {authorization.config.api_key}"
elif authorization.config.type == "custom":
headers[authorization.config.header] = authorization.config.api_key
return headers
@staticmethod
def get_external_knowledge_api_settings(settings: dict) -> ExternalKnowledgeApiSetting:
return ExternalKnowledgeApiSetting.parse_obj(settings)
@staticmethod
def create_external_dataset(tenant_id: str, user_id: str, args: dict) -> Dataset:
# check if dataset name already exists
if Dataset.query.filter_by(name=args.get("name"), tenant_id=tenant_id).first():
raise DatasetNameDuplicateError(f"Dataset with name {args.get('name')} already exists.")
external_knowledge_api = ExternalKnowledgeApis.query.filter_by(
id=args.get("external_knowledge_api_id"), tenant_id=tenant_id
).first()
if external_knowledge_api is None:
raise ValueError("api template not found")
dataset = Dataset(
tenant_id=tenant_id,
name=args.get("name"),
description=args.get("description", ""),
provider="external",
retrieval_model=args.get("external_retrieval_model"),
created_by=user_id,
)
db.session.add(dataset)
db.session.flush()
external_knowledge_binding = ExternalKnowledgeBindings(
tenant_id=tenant_id,
dataset_id=dataset.id,
external_knowledge_api_id=args.get("external_knowledge_api_id"),
external_knowledge_id=args.get("external_knowledge_id"),
created_by=user_id,
)
db.session.add(external_knowledge_binding)
db.session.commit()
return dataset
@staticmethod
def fetch_external_knowledge_retrieval(
tenant_id: str, dataset_id: str, query: str, external_retrieval_parameters: dict
) -> list:
external_knowledge_binding = ExternalKnowledgeBindings.query.filter_by(
dataset_id=dataset_id, tenant_id=tenant_id
).first()
if not external_knowledge_binding:
raise ValueError("external knowledge binding not found")
external_knowledge_api = ExternalKnowledgeApis.query.filter_by(
id=external_knowledge_binding.external_knowledge_api_id
).first()
if not external_knowledge_api:
raise ValueError("external api template not found")
settings = json.loads(external_knowledge_api.settings)
headers = {"Content-Type": "application/json"}
if settings.get("api_key"):
headers["Authorization"] = f"Bearer {settings.get('api_key')}"
score_threshold_enabled = external_retrieval_parameters.get("score_threshold_enabled") or False
score_threshold = external_retrieval_parameters.get("score_threshold", 0.0) if score_threshold_enabled else 0.0
request_params = {
"retrieval_setting": {
"top_k": external_retrieval_parameters.get("top_k"),
"score_threshold": score_threshold,
},
"query": query,
"knowledge_id": external_knowledge_binding.external_knowledge_id,
}
external_knowledge_api_setting = {
"url": f"{settings.get('endpoint')}/retrieval",
"request_method": "post",
"headers": headers,
"params": request_params,
}
response = ExternalDatasetService.process_external_api(
ExternalKnowledgeApiSetting(**external_knowledge_api_setting), None
)
if response.status_code == 200:
return response.json().get("records", [])
return []

View File

@ -19,7 +19,15 @@ default_retrieval_model = {
class HitTestingService:
@classmethod
def retrieve(cls, dataset: Dataset, query: str, account: Account, retrieval_model: dict, limit: int = 10) -> dict:
def retrieve(
cls,
dataset: Dataset,
query: str,
account: Account,
retrieval_model: dict,
external_retrieval_model: dict,
limit: int = 10,
) -> dict:
if dataset.available_document_count == 0 or dataset.available_segment_count == 0:
return {
"query": {
@ -62,10 +70,44 @@ class HitTestingService:
return cls.compact_retrieve_response(dataset, query, all_documents)
@classmethod
def external_retrieve(
cls,
dataset: Dataset,
query: str,
account: Account,
external_retrieval_model: dict,
) -> dict:
if dataset.provider != "external":
return {
"query": {"content": query},
"records": [],
}
start = time.perf_counter()
all_documents = RetrievalService.external_retrieve(
dataset_id=dataset.id,
query=cls.escape_query_for_search(query),
external_retrieval_model=external_retrieval_model,
)
end = time.perf_counter()
logging.debug(f"External knowledge hit testing retrieve in {end - start:0.4f} seconds")
dataset_query = DatasetQuery(
dataset_id=dataset.id, content=query, source="hit_testing", created_by_role="account", created_by=account.id
)
db.session.add(dataset_query)
db.session.commit()
return cls.compact_external_retrieve_response(dataset, query, all_documents)
@classmethod
def compact_retrieve_response(cls, dataset: Dataset, query: str, documents: list[Document]):
i = 0
records = []
for document in documents:
index_node_id = document.metadata["doc_id"]
@ -81,7 +123,6 @@ class HitTestingService:
)
if not segment:
i += 1
continue
record = {
@ -91,8 +132,6 @@ class HitTestingService:
records.append(record)
i += 1
return {
"query": {
"content": query,
@ -100,6 +139,25 @@ class HitTestingService:
"records": records,
}
@classmethod
def compact_external_retrieve_response(cls, dataset: Dataset, query: str, documents: list):
records = []
if dataset.provider == "external":
for document in documents:
record = {
"content": document.get("content", None),
"title": document.get("title", None),
"score": document.get("score", None),
"metadata": document.get("metadata", None),
}
records.append(record)
return {
"query": {
"content": query,
},
"records": records,
}
@classmethod
def hit_testing_args_check(cls, args):
query = args["query"]

View File

@ -0,0 +1,93 @@
import json
import logging
import time
import click
from celery import shared_task
from core.indexing_runner import DocumentIsPausedException
from extensions.ext_database import db
from extensions.ext_storage import storage
from models.dataset import Dataset, ExternalKnowledgeApis
from models.model import UploadFile
from services.external_knowledge_service import ExternalDatasetService
@shared_task(queue="dataset")
def external_document_indexing_task(
dataset_id: str, external_knowledge_api_id: str, data_source: dict, process_parameter: dict
):
"""
Async process document
:param dataset_id:
:param external_knowledge_api_id:
:param data_source:
:param process_parameter:
Usage: external_document_indexing_task.delay(dataset_id, document_id)
"""
start_at = time.perf_counter()
dataset = db.session.query(Dataset).filter(Dataset.id == dataset_id).first()
if not dataset:
logging.info(
click.style("Processed external dataset: {} failed, dataset not exit.".format(dataset_id), fg="red")
)
return
# get external api template
external_knowledge_api = (
db.session.query(ExternalKnowledgeApis)
.filter(
ExternalKnowledgeApis.id == external_knowledge_api_id, ExternalKnowledgeApis.tenant_id == dataset.tenant_id
)
.first()
)
if not external_knowledge_api:
logging.info(
click.style(
"Processed external dataset: {} failed, api template: {} not exit.".format(
dataset_id, external_knowledge_api_id
),
fg="red",
)
)
return
files = {}
if data_source["type"] == "upload_file":
upload_file_list = data_source["info_list"]["file_info_list"]["file_ids"]
for file_id in upload_file_list:
file = (
db.session.query(UploadFile)
.filter(UploadFile.tenant_id == dataset.tenant_id, UploadFile.id == file_id)
.first()
)
if file:
files[file.id] = (file.name, storage.load_once(file.key), file.mime_type)
try:
settings = ExternalDatasetService.get_external_knowledge_api_settings(
json.loads(external_knowledge_api.settings)
)
# assemble headers
headers = ExternalDatasetService.assembling_headers(settings.authorization, settings.headers)
# do http request
response = ExternalDatasetService.process_external_api(settings, headers, process_parameter, files)
job_id = response.json().get("job_id")
if job_id:
# save job_id to dataset
dataset.job_id = job_id
db.session.commit()
end_at = time.perf_counter()
logging.info(
click.style(
"Processed external dataset: {} successful, latency: {}".format(dataset.id, end_at - start_at),
fg="green",
)
)
except DocumentIsPausedException as ex:
logging.info(click.style(str(ex), fg="yellow"))
except Exception:
pass

View File

@ -1,6 +1,6 @@
'use client'
import type { FC, SVGProps } from 'react'
import React, { useEffect } from 'react'
import React, { useEffect, useMemo } from 'react'
import { usePathname } from 'next/navigation'
import useSWR from 'swr'
import { useTranslation } from 'react-i18next'
@ -203,12 +203,23 @@ const DatasetDetailLayout: FC<IAppDetailLayoutProps> = (props) => {
datasetId,
}, apiParams => fetchDatasetRelatedApps(apiParams.datasetId))
const navigation = [
{ name: t('common.datasetMenus.documents'), href: `/datasets/${datasetId}/documents`, icon: DocumentTextIcon, selectedIcon: DocumentTextSolidIcon },
{ name: t('common.datasetMenus.hitTesting'), href: `/datasets/${datasetId}/hitTesting`, icon: TargetIcon, selectedIcon: TargetSolidIcon },
// { name: 'api & webhook', href: `/datasets/${datasetId}/api`, icon: CommandLineIcon, selectedIcon: CommandLineSolidIcon },
{ name: t('common.datasetMenus.settings'), href: `/datasets/${datasetId}/settings`, icon: Cog8ToothIcon, selectedIcon: Cog8ToothSolidIcon },
]
const navigation = useMemo(() => {
const baseNavigation = [
{ name: t('common.datasetMenus.hitTesting'), href: `/datasets/${datasetId}/hitTesting`, icon: TargetIcon, selectedIcon: TargetSolidIcon },
// { name: 'api & webhook', href: `/datasets/${datasetId}/api`, icon: CommandLineIcon, selectedIcon: CommandLineSolidIcon },
{ name: t('common.datasetMenus.settings'), href: `/datasets/${datasetId}/settings`, icon: Cog8ToothIcon, selectedIcon: Cog8ToothSolidIcon },
]
if (datasetRes?.provider !== 'external') {
baseNavigation.unshift({
name: t('common.datasetMenus.documents'),
href: `/datasets/${datasetId}/documents`,
icon: DocumentTextIcon,
selectedIcon: DocumentTextSolidIcon,
})
}
return baseNavigation
}, [datasetRes?.provider, datasetId, t])
useEffect(() => {
if (datasetRes)
@ -233,6 +244,7 @@ const DatasetDetailLayout: FC<IAppDetailLayoutProps> = (props) => {
icon={datasetRes?.icon || 'https://static.dify.ai/images/dataset-default-icon.png'}
icon_background={datasetRes?.icon_background || '#F5F5F5'}
desc={datasetRes?.description || '--'}
isExternal={datasetRes?.provider === 'external'}
navigation={navigation}
extraInfo={!isCurrentWorkspaceDatasetOperator ? mode => <ExtraInfo isMobile={mode === 'collapse'} relatedApps={relatedApps} /> : undefined}
iconType={datasetRes?.data_source_type === DataSourceType.NOTION ? 'notion' : 'dataset'}

View File

@ -8,6 +8,7 @@ import { useDebounceFn } from 'ahooks'
import useSWR from 'swr'
// Components
import ExternalAPIPanel from '../../components/datasets/external-api/external-api-panel'
import Datasets from './Datasets'
import DatasetFooter from './DatasetFooter'
import ApiServer from './ApiServer'
@ -16,6 +17,8 @@ import TabSliderNew from '@/app/components/base/tab-slider-new'
import SearchInput from '@/app/components/base/search-input'
import TagManagementModal from '@/app/components/base/tag-management'
import TagFilter from '@/app/components/base/tag-management/filter'
import Button from '@/app/components/base/button'
import { ApiConnectionMod } from '@/app/components/base/icons/src/vender/solid/development'
// Services
import { fetchDatasetApiBaseUrl } from '@/service/datasets'
@ -24,12 +27,14 @@ import { fetchDatasetApiBaseUrl } from '@/service/datasets'
import { useTabSearchParams } from '@/hooks/use-tab-searchparams'
import { useStore as useTagStore } from '@/app/components/base/tag-management/store'
import { useAppContext } from '@/context/app-context'
import { useExternalApiPanel } from '@/context/external-api-panel-context'
const Container = () => {
const { t } = useTranslation()
const router = useRouter()
const { currentWorkspace } = useAppContext()
const showTagManagementModal = useTagStore(s => s.showTagManagementModal)
const { showExternalApiPanel, setShowExternalApiPanel } = useExternalApiPanel()
const options = useMemo(() => {
return [
@ -66,7 +71,7 @@ const Container = () => {
useEffect(() => {
if (currentWorkspace.role === 'normal')
return router.replace('/apps')
}, [currentWorkspace])
}, [currentWorkspace, router])
return (
<div ref={containerRef} className='grow relative flex flex-col bg-gray-100 overflow-y-auto'>
@ -80,11 +85,18 @@ const Container = () => {
<div className='flex items-center gap-2'>
<TagFilter type='knowledge' value={tagFilterValue} onChange={handleTagsChange} />
<SearchInput className='w-[200px]' value={keywords} onChange={handleKeywordsChange} />
<div className="w-[1px] h-4 bg-divider-regular" />
<Button
className='gap-0.5 shadows-shadow-xs'
onClick={() => setShowExternalApiPanel(true)}
>
<ApiConnectionMod className='w-4 h-4 text-components-button-secondary-text' />
<div className='flex px-0.5 justify-center items-center gap-1 text-components-button-secondary-text system-sm-medium'>{t('dataset.externalAPIPanelTitle')}</div>
</Button>
</div>
)}
{activeTab === 'api' && data && <ApiServer apiBaseUrl={data.api_base_url || ''} />}
</div>
{activeTab === 'dataset' && (
<>
<Datasets containerRef={containerRef} tags={tagIDs} keywords={searchKeywords} />
@ -94,10 +106,10 @@ const Container = () => {
)}
</>
)}
{activeTab === 'api' && data && <Doc apiBaseUrl={data.api_base_url || ''} />}
</div>
{showExternalApiPanel && <ExternalAPIPanel onClose={() => setShowExternalApiPanel(false)} />}
</div>
)
}

View File

@ -18,6 +18,7 @@ import Divider from '@/app/components/base/divider'
import RenameDatasetModal from '@/app/components/datasets/rename-modal'
import type { Tag } from '@/app/components/base/tag-management/constant'
import TagSelector from '@/app/components/base/tag-management/selector'
import CornerLabel from '@/app/components/base/corner-label'
import { useAppContext } from '@/context/app-context'
export type DatasetCardProps = {
@ -32,6 +33,7 @@ const DatasetCard = ({
const { t } = useTranslation()
const { notify } = useContext(ToastContext)
const { push } = useRouter()
const EXTERNAL_PROVIDER = 'external' as const
const { isCurrentWorkspaceDatasetOperator } = useAppContext()
const [tags, setTags] = useState<Tag[]>(dataset.tags)
@ -39,6 +41,7 @@ const DatasetCard = ({
const [showRenameModal, setShowRenameModal] = useState(false)
const [showConfirmDelete, setShowConfirmDelete] = useState(false)
const [confirmMessage, setConfirmMessage] = useState<string>('')
const isExternalProvider = (provider: string): boolean => provider === EXTERNAL_PROVIDER
const detectIsUsedByApp = useCallback(async () => {
try {
const { is_using: isUsedByApp } = await checkIsUsedInApp(dataset.id)
@ -108,13 +111,16 @@ const DatasetCard = ({
return (
<>
<div
className='group col-span-1 bg-white border-2 border-solid border-transparent rounded-xl shadow-sm min-h-[160px] flex flex-col transition-all duration-200 ease-in-out cursor-pointer hover:shadow-lg'
className='group relative col-span-1 bg-white border-[0.5px] border-solid border-transparent rounded-xl shadow-sm min-h-[160px] flex flex-col transition-all duration-200 ease-in-out cursor-pointer hover:shadow-lg'
data-disable-nprogress={true}
onClick={(e) => {
e.preventDefault()
push(`/datasets/${dataset.id}/documents`)
isExternalProvider(dataset.provider)
? push(`/datasets/${dataset.id}/hitTesting`)
: push(`/datasets/${dataset.id}/documents`)
}}
>
{isExternalProvider(dataset.provider) && <CornerLabel label='External' className='absolute right-0' labelClassName='rounded-tr-xl' />}
<div className='flex pt-[14px] px-[14px] pb-3 h-[66px] items-center gap-3 grow-0 shrink-0'>
<div className={cn(
'shrink-0 flex items-center justify-center p-2.5 bg-[#F5F8FF] rounded-md border-[0.5px] border-[#E0EAFF]',
@ -136,13 +142,20 @@ const DatasetCard = ({
<div className='flex items-center mt-[1px] text-xs leading-[18px] text-gray-500'>
<div
className={cn('truncate', (!dataset.embedding_available || !dataset.document_count) && 'opacity-50')}
title={`${dataset.document_count}${t('dataset.documentCount')} · ${Math.round(dataset.word_count / 1000)}${t('dataset.wordCount')} · ${dataset.app_count}${t('dataset.appCount')}`}
title={dataset.provider === 'external' ? `${dataset.app_count}${t('dataset.appCount')}` : `${dataset.document_count}${t('dataset.documentCount')} · ${Math.round(dataset.word_count / 1000)}${t('dataset.wordCount')} · ${dataset.app_count}${t('dataset.appCount')}`}
>
<span>{dataset.document_count}{t('dataset.documentCount')}</span>
<span className='shrink-0 mx-0.5 w-1 text-gray-400'>·</span>
<span>{Math.round(dataset.word_count / 1000)}{t('dataset.wordCount')}</span>
<span className='shrink-0 mx-0.5 w-1 text-gray-400'>·</span>
<span>{dataset.app_count}{t('dataset.appCount')}</span>
{dataset.provider === 'external'
? <>
<span>{dataset.app_count}{t('dataset.appCount')}</span>
</>
: <>
<span>{dataset.document_count}{t('dataset.documentCount')}</span>
<span className='shrink-0 mx-0.5 w-1 text-gray-400'>·</span>
<span>{Math.round(dataset.word_count / 1000)}{t('dataset.wordCount')}</span>
<span className='shrink-0 mx-0.5 w-1 text-gray-400'>·</span>
<span>{dataset.app_count}{t('dataset.appCount')}</span>
</>
}
</div>
</div>
</div>

View File

@ -4,21 +4,32 @@ import { forwardRef } from 'react'
import { useTranslation } from 'react-i18next'
import {
RiAddLine,
RiArrowRightLine,
} from '@remixicon/react'
const CreateAppCard = forwardRef<HTMLAnchorElement>((_, ref) => {
const { t } = useTranslation()
return (
<a ref={ref} className='group flex flex-col col-span-1 bg-gray-200 border-[0.5px] border-black/5 rounded-xl min-h-[160px] transition-all duration-200 ease-in-out cursor-pointer hover:bg-white hover:shadow-lg' href='/datasets/create'>
<div className='shrink-0 flex items-center p-4 pb-3'>
<div className='w-10 h-10 flex items-center justify-center border border-gray-200 bg-gray-100 rounded-lg'>
<RiAddLine className='w-4 h-4 text-gray-500'/>
<div className='flex flex-col bg-background-default-dimm border-[0.5px] border-components-panel-border rounded-xl
min-h-[160px] transition-all duration-200 ease-in-out'
>
<a ref={ref} className='group flex flex-grow items-start p-4 cursor-pointer' href='/datasets/create'>
<div className='flex items-center gap-3'>
<div className='w-10 h-10 p-2 flex items-center justify-center border border-dashed border-divider-regular rounded-lg
bg-background-default-lighter group-hover:border-solid group-hover:border-effects-highlight group-hover:bg-background-default-dodge'
>
<RiAddLine className='w-4 h-4 text-text-tertiary group-hover:text-text-accent'/>
</div>
<div className='system-md-semibold text-text-secondary group-hover:text-text-accent'>{t('dataset.createDataset')}</div>
</div>
<div className='ml-3 text-sm font-semibold leading-5 text-gray-800 group-hover:text-primary-600'>{t('dataset.createDataset')}</div>
</div>
<div className='mb-1 px-4 text-xs leading-normal text-gray-500 line-clamp-4'>{t('dataset.createDatasetIntro')}</div>
</a>
</a>
<div className='p-4 pt-0 text-text-tertiary system-xs-regular'>{t('dataset.createDatasetIntro')}</div>
<a className='group flex p-4 items-center gap-1 border-t-[0.5px] border-divider-subtle rounded-b-xl cursor-pointer' href='/datasets/connect'>
<div className='system-xs-medium text-text-tertiary group-hover:text-text-accent'>{t('dataset.connectDataset')}</div>
<RiArrowRightLine className='w-3.5 h-3.5 text-text-tertiary group-hover:text-text-accent' />
</a>
</div>
)
})

View File

@ -0,0 +1,8 @@
import React from 'react'
import ExternalKnowledgeBaseConnector from '@/app/components/datasets/external-knowledge-base/connector'
const ExternalKnowledgeBaseCreation = () => {
return <ExternalKnowledgeBaseConnector />
}
export default ExternalKnowledgeBaseCreation

View File

@ -0,0 +1,14 @@
'use client'
import { ExternalApiPanelProvider } from '@/context/external-api-panel-context'
import { ExternalKnowledgeApiProvider } from '@/context/external-knowledge-api-context'
export default function DatasetsLayout({ children }: { children: React.ReactNode }) {
return (
<ExternalKnowledgeApiProvider>
<ExternalApiPanelProvider>
{children}
</ExternalApiPanelProvider>
</ExternalKnowledgeApiProvider>
)
}

View File

@ -1,9 +1,7 @@
import Container from './Container'
const AppList = async () => {
return (
<Container />
)
return <Container />
}
export const metadata = {

View File

@ -0,0 +1,11 @@
import { create } from 'zustand'
type DatasetStore = {
showExternalApiPanel: boolean
setShowExternalApiPanel: (show: boolean) => void
}
export const useDatasetStore = create<DatasetStore>(set => ({
showExternalApiPanel: false,
setShowExternalApiPanel: show => set({ showExternalApiPanel: show }),
}))

View File

@ -1,4 +1,5 @@
import React from 'react'
import { useTranslation } from 'react-i18next'
import AppIcon from '../base/app-icon'
import Tooltip from '@/app/components/base/tooltip'
@ -6,6 +7,7 @@ export type IAppBasicProps = {
iconType?: 'app' | 'api' | 'dataset' | 'webapp' | 'notion'
icon?: string
icon_background?: string | null
isExternal?: boolean
name: string
type: string | React.ReactNode
hoverTip?: string
@ -52,7 +54,9 @@ const ICON_MAP = {
notion: <AppIcon innerIcon={NotionSvg} className='!border-[0.5px] !border-indigo-100 !bg-white' />,
}
export default function AppBasic({ icon, icon_background, name, type, hoverTip, textStyle, mode = 'expand', iconType = 'app' }: IAppBasicProps) {
export default function AppBasic({ icon, icon_background, name, isExternal, type, hoverTip, textStyle, mode = 'expand', iconType = 'app' }: IAppBasicProps) {
const { t } = useTranslation()
return (
<div className="flex items-start p-1">
{icon && icon_background && iconType === 'app' && (
@ -83,6 +87,7 @@ export default function AppBasic({ icon, icon_background, name, type, hoverTip,
}
</div>
<div className={`text-xs font-normal text-gray-500 group-hover:text-gray-700 break-all ${textStyle?.extra ?? ''}`}>{type}</div>
<div className='text-text-tertiary system-2xs-medium-uppercase'>{isExternal ? t('dataset.externalTag') : ''}</div>
</div>}
</div>
)

View File

@ -15,6 +15,7 @@ export type IAppDetailNavProps = {
iconType?: 'app' | 'dataset' | 'notion'
title: string
desc: string
isExternal?: boolean
icon: string
icon_background: string
navigation: Array<{
@ -26,7 +27,7 @@ export type IAppDetailNavProps = {
extraInfo?: (modeState: string) => React.ReactNode
}
const AppDetailNav = ({ title, desc, icon, icon_background, navigation, extraInfo, iconType = 'app' }: IAppDetailNavProps) => {
const AppDetailNav = ({ title, desc, isExternal, icon, icon_background, navigation, extraInfo, iconType = 'app' }: IAppDetailNavProps) => {
const { appSidebarExpand, setAppSiderbarExpand } = useAppStore(useShallow(state => ({
appSidebarExpand: state.appSidebarExpand,
setAppSiderbarExpand: state.setAppSiderbarExpand,
@ -70,6 +71,7 @@ const AppDetailNav = ({ title, desc, icon, icon_background, navigation, extraInf
icon_background={icon_background}
name={title}
type={desc}
isExternal={isExternal}
/>
)}
</div>

View File

@ -5,6 +5,7 @@ import {
RiDeleteBinLine,
RiEditLine,
} from '@remixicon/react'
import { useTranslation } from 'react-i18next'
import SettingsModal from '../settings-modal'
import type { DataSet } from '@/models/datasets'
import { DataSourceType } from '@/models/datasets'
@ -33,6 +34,7 @@ const Item: FC<ItemProps> = ({
const isMobile = media === MediaType.mobile
const [showSettingsModal, setShowSettingsModal] = useState(false)
const { formatIndexingTechniqueAndMethod } = useKnowledge()
const { t } = useTranslation()
const handleSave = (newDataset: DataSet) => {
onSave(newDataset)
@ -65,9 +67,11 @@ const Item: FC<ItemProps> = ({
<div className='grow'>
<div className='flex items-center h-[18px]'>
<div className='grow text-[13px] font-medium text-gray-800 truncate' title={config.name}>{config.name}</div>
<Badge
text={formatIndexingTechniqueAndMethod(config.indexing_technique, config.retrieval_model_dict?.search_method)}
/>
{config.provider === 'external'
? <Badge text={t('dataset.externalTag')}></Badge>
: <Badge
text={formatIndexingTechniqueAndMethod(config.indexing_technique, config.retrieval_model_dict?.search_method)}
/>}
</div>
</div>
<div className='hidden rounded-lg group-hover:flex items-center justify-end absolute right-0 top-0 bottom-0 pr-2 w-[124px] bg-gradient-to-r from-white/50 to-white to-50%'>

View File

@ -174,6 +174,20 @@ const ConfigContent: FC<Props> = ({
</div>
)
}
{
selectedDatasetsMode.mixtureInternalAndExternal && (
<div className='mt-4 system-xs-medium text-text-warning'>
{t('dataset.mixtureInternalAndExternalTip')}
</div>
)
}
{
selectedDatasetsMode.allExternal && (
<div className='mt-4 system-xs-medium text-text-warning'>
{t('dataset.allExternalTip')}
</div>
)
}
{
selectedDatasetsMode.mixtureHighQualityAndEconomic
&& (
@ -229,15 +243,15 @@ const ConfigContent: FC<Props> = ({
/>
)
}
<div className='ml-2 leading-[32px] text-[13px] font-medium text-gray-900'>{t('common.modelProvider.rerankModel.key')}</div>
<div className='leading-[32px] text-text-secondary system-sm-semibold'>{t('common.modelProvider.rerankModel.key')}</div>
<Tooltip
popupContent={
<div className="w-[200px]">
{t('common.modelProvider.rerankModel.tip')}
</div>
}
popupClassName='ml-0.5'
triggerClassName='ml-0.5 w-3.5 h-3.5'
popupClassName='ml-1'
triggerClassName='ml-1 w-4 h-4'
/>
</div>
<div>

View File

@ -39,13 +39,26 @@ const ParamsConfig = ({
useEffect(() => {
const {
allEconomic,
allHighQuality,
allHighQualityFullTextSearch,
allHighQualityVectorSearch,
allExternal,
mixtureHighQualityAndEconomic,
inconsistentEmbeddingModel,
mixtureInternalAndExternal,
} = getSelectedDatasetsMode(selectedDatasets)
const { datasets, retrieval_model, score_threshold_enabled, ...restConfigs } = datasetConfigs
let rerankEnable = restConfigs.reranking_enable
if (allEconomic && !restConfigs.reranking_model?.reranking_provider_name && rerankEnable === undefined)
if ((allEconomic && !restConfigs.reranking_model?.reranking_provider_name && rerankEnable === undefined) || allExternal)
rerankEnable = false
if (allEconomic || allHighQuality || allHighQualityFullTextSearch || allHighQualityVectorSearch || (allExternal && selectedDatasets.length === 1))
setRerankSettingModalOpen(false)
if (mixtureHighQualityAndEconomic || inconsistentEmbeddingModel || mixtureInternalAndExternal || (allExternal && selectedDatasets.length > 1))
setRerankSettingModalOpen(true)
setTempDataSetConfigs({
...getMultipleRetrievalConfig({
top_k: restConfigs.top_k,

View File

@ -47,7 +47,7 @@ const SelectDataSet: FC<ISelectDataSetProps> = ({
const { data, has_more } = await fetchDatasets({ url: '/datasets', params: { page } })
setPage(getPage() + 1)
setIsNoMore(!has_more)
const newList = [...(datasets || []), ...data.filter(item => item.indexing_technique)]
const newList = [...(datasets || []), ...data.filter(item => item.indexing_technique || item.provider === 'external')]
setDataSets(newList)
setLoaded(true)
if (!selected.find(item => !item.name))
@ -145,6 +145,11 @@ const SelectDataSet: FC<ISelectDataSetProps> = ({
/>
)
}
{
item.provider === 'external' && (
<Badge text={t('dataset.externalTag')} />
)
}
</div>
))}
</div>

View File

@ -5,8 +5,10 @@ import { useTranslation } from 'react-i18next'
import { isEqual } from 'lodash-es'
import { RiCloseLine } from '@remixicon/react'
import { BookOpenIcon } from '@heroicons/react/24/outline'
import { ApiConnectionMod } from '@/app/components/base/icons/src/vender/solid/development'
import cn from '@/utils/classnames'
import IndexMethodRadio from '@/app/components/datasets/settings/index-method-radio'
import Divider from '@/app/components/base/divider'
import Button from '@/app/components/base/button'
import type { DataSet } from '@/models/datasets'
import { useToastContext } from '@/app/components/base/toast'
@ -14,6 +16,7 @@ import { updateDatasetSetting } from '@/service/datasets'
import { useAppContext } from '@/context/app-context'
import { useModalContext } from '@/context/modal-context'
import type { RetrievalConfig } from '@/types/app'
import RetrievalSettings from '@/app/components/datasets/external-knowledge-base/create/RetrievalSettings'
import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config'
import EconomicalRetrievalMethodConfig from '@/app/components/datasets/common/economical-retrieval-method-config'
import { ensureRerankModelSelected, isReRankModelSelected } from '@/app/components/datasets/common/check-rerank-model'
@ -56,6 +59,9 @@ const SettingsModal: FC<SettingsModalProps> = ({
const { t } = useTranslation()
const { notify } = useToastContext()
const ref = useRef(null)
const [topK, setTopK] = useState(currentDataset?.external_retrieval_model.top_k ?? 2)
const [scoreThreshold, setScoreThreshold] = useState(currentDataset?.external_retrieval_model.score_threshold ?? 0.5)
const [scoreThresholdEnabled, setScoreThresholdEnabled] = useState(currentDataset?.external_retrieval_model.score_threshold_enabled ?? false)
const { setShowAccountSettingModal } = useModalContext()
const [loading, setLoading] = useState(false)
@ -73,6 +79,15 @@ const SettingsModal: FC<SettingsModalProps> = ({
const [isHideChangedTip, setIsHideChangedTip] = useState(false)
const isRetrievalChanged = !isEqual(retrievalConfig, localeCurrentDataset?.retrieval_model_dict) || indexMethod !== localeCurrentDataset?.indexing_technique
const handleSettingsChange = (data: { top_k?: number; score_threshold?: number; score_threshold_enabled?: boolean }) => {
if (data.top_k !== undefined)
setTopK(data.top_k)
if (data.score_threshold !== undefined)
setScoreThreshold(data.score_threshold)
if (data.score_threshold_enabled !== undefined)
setScoreThresholdEnabled(data.score_threshold_enabled)
}
const handleSave = async () => {
if (loading)
return
@ -107,10 +122,17 @@ const SettingsModal: FC<SettingsModalProps> = ({
description,
permission,
indexing_technique: indexMethod,
external_retrieval_model: {
top_k: topK,
score_threshold: scoreThreshold,
score_threshold_enabled: scoreThresholdEnabled,
},
retrieval_model: {
...postRetrievalConfig,
score_threshold: postRetrievalConfig.score_threshold_enabled ? postRetrievalConfig.score_threshold : 0,
},
external_knowledge_id: currentDataset!.external_knowledge_info.external_knowledge_id,
external_knowledge_api_id: currentDataset!.external_knowledge_info.external_knowledge_api_id,
embedding_model: localeCurrentDataset.embedding_model,
embedding_model_provider: localeCurrentDataset.embedding_model_provider,
},
@ -178,7 +200,7 @@ const SettingsModal: FC<SettingsModalProps> = ({
}}>
<div className={cn(rowClass, 'items-center')}>
<div className={labelClass}>
{t('datasetSettings.form.name')}
<div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.name')}</div>
</div>
<input
value={localeCurrentDataset.name}
@ -189,7 +211,7 @@ const SettingsModal: FC<SettingsModalProps> = ({
</div>
<div className={cn(rowClass)}>
<div className={labelClass}>
{t('datasetSettings.form.desc')}
<div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.desc')}</div>
</div>
<div className='w-full'>
<textarea
@ -206,7 +228,7 @@ const SettingsModal: FC<SettingsModalProps> = ({
</div>
<div className={rowClass}>
<div className={labelClass}>
<div>{t('datasetSettings.form.permissions')}</div>
<div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.permissions')}</div>
</div>
<div className='w-full'>
<PermissionSelector
@ -219,24 +241,25 @@ const SettingsModal: FC<SettingsModalProps> = ({
/>
</div>
</div>
<div className="w-full h-0 border-b-[0.5px] border-b-gray-200 my-2"></div>
<div className={cn(rowClass)}>
<div className={labelClass}>
{t('datasetSettings.form.indexMethod')}
{currentDataset && currentDataset.indexing_technique && (
<div className={cn(rowClass)}>
<div className={labelClass}>
<div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.indexMethod')}</div>
</div>
<div className='grow'>
<IndexMethodRadio
disable={!localeCurrentDataset?.embedding_available}
value={indexMethod}
onChange={v => setIndexMethod(v!)}
itemClassName='sm:!w-[280px]'
/>
</div>
</div>
<div className='grow'>
<IndexMethodRadio
disable={!localeCurrentDataset?.embedding_available}
value={indexMethod}
onChange={v => setIndexMethod(v!)}
itemClassName='sm:!w-[280px]'
/>
</div>
</div>
)}
{indexMethod === 'high_quality' && (
<div className={cn(rowClass)}>
<div className={labelClass}>
{t('datasetSettings.form.embeddingModel')}
<div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.embeddingModel')}</div>
</div>
<div className='w-full'>
<div className='w-full h-9 rounded-lg bg-gray-100 opacity-60'>
@ -258,32 +281,75 @@ const SettingsModal: FC<SettingsModalProps> = ({
)}
{/* Retrieval Method Config */}
<div className={rowClass}>
<div className={cn(labelClass, 'w-auto min-w-[168px]')}>
<div>
<div>{t('datasetSettings.form.retrievalSetting.title')}</div>
<div className='leading-[18px] text-xs font-normal text-gray-500'>
<a target='_blank' rel='noopener noreferrer' href='https://docs.dify.ai/guides/knowledge-base/create-knowledge-and-upload-documents#id-4-retrieval-settings' className='text-[#155eef]'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a>
{t('datasetSettings.form.retrievalSetting.description')}
{currentDataset?.provider === 'external'
? <>
<div className={rowClass}><Divider/></div>
<div className={rowClass}>
<div className={labelClass}>
<div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.retrievalSetting.title')}</div>
</div>
<RetrievalSettings
topK={topK}
scoreThreshold={scoreThreshold}
scoreThresholdEnabled={scoreThresholdEnabled}
onChange={handleSettingsChange}
isInRetrievalSetting={true}
/>
</div>
<div className={rowClass}><Divider/></div>
<div className={rowClass}>
<div className={labelClass}>
<div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.externalKnowledgeAPI')}</div>
</div>
<div className='w-full max-w-[480px]'>
<div className='flex h-full px-3 py-2 items-center gap-1 rounded-lg bg-components-input-bg-normal'>
<ApiConnectionMod className='w-4 h-4 text-text-secondary' />
<div className='overflow-hidden text-text-secondary text-ellipsis system-sm-medium'>
{currentDataset?.external_knowledge_info.external_knowledge_api_name}
</div>
<div className='text-text-tertiary system-xs-regular'>·</div>
<div className='text-text-tertiary system-xs-regular'>{currentDataset?.external_knowledge_info.external_knowledge_api_endpoint}</div>
</div>
</div>
</div>
</div>
<div>
{indexMethod === 'high_quality'
? (
<RetrievalMethodConfig
value={retrievalConfig}
onChange={setRetrievalConfig}
/>
)
: (
<EconomicalRetrievalMethodConfig
value={retrievalConfig}
onChange={setRetrievalConfig}
/>
)}
</div>
</div>
<div className={rowClass}>
<div className={labelClass}>
<div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.externalKnowledgeID')}</div>
</div>
<div className='w-full max-w-[480px]'>
<div className='flex h-full px-3 py-2 items-center gap-1 rounded-lg bg-components-input-bg-normal'>
<div className='text-text-tertiary system-xs-regular'>{currentDataset?.external_knowledge_info.external_knowledge_id}</div>
</div>
</div>
</div>
<div className={rowClass}><Divider/></div>
</>
: <div className={rowClass}>
<div className={cn(labelClass, 'w-auto min-w-[168px]')}>
<div>
<div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.retrievalSetting.title')}</div>
<div className='leading-[18px] text-xs font-normal text-gray-500'>
<a target='_blank' rel='noopener noreferrer' href='https://docs.dify.ai/guides/knowledge-base/create-knowledge-and-upload-documents#id-4-retrieval-settings' className='text-[#155eef]'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a>
{t('datasetSettings.form.retrievalSetting.description')}
</div>
</div>
</div>
<div>
{indexMethod === 'high_quality'
? (
<RetrievalMethodConfig
value={retrievalConfig}
onChange={setRetrievalConfig}
/>
)
: (
<EconomicalRetrievalMethodConfig
value={retrievalConfig}
onChange={setRetrievalConfig}
/>
)}
</div>
</div>}
</div>
{isRetrievalChanged && !isHideChangedTip && (
<div className='absolute z-10 left-[30px] right-[30px] bottom-[76px] flex h-10 items-center px-3 rounded-lg border border-[#FEF0C7] bg-[#FFFAEB] shadow-lg justify-between'>

View File

@ -0,0 +1,21 @@
import { Corner } from '../icons/src/vender/solid/shapes'
import cn from '@/utils/classnames'
type CornerLabelProps = {
label: string
className?: string
labelClassName?: string
}
const CornerLabel: React.FC<CornerLabelProps> = ({ label, className, labelClassName }) => {
return (
<div className={cn('group/corner-label inline-flex items-start', className)}>
<Corner className='w-[13px] h-5 text-background-section group-hover/corner-label:text-background-section-burn' />
<div className={cn('flex py-1 pr-2 items-center gap-0.5 bg-background-section group-hover/corner-label:bg-background-section-burn', labelClassName)}>
<div className='text-text-tertiary system-2xs-medium-uppercase'>{label}</div>
</div>
</div>
)
}
export default CornerLabel

View File

@ -0,0 +1,5 @@
<svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
<g id="Icon L">
<path id="Vector" fill-rule="evenodd" clip-rule="evenodd" d="M7.99996 3.33333C5.42263 3.33333 3.33329 5.42267 3.33329 8C3.33329 10.5773 5.42263 12.6667 7.99996 12.6667C9.72643 12.6667 11.2348 11.7295 12.0427 10.3329C12.227 10.0141 12.6349 9.90523 12.9536 10.0896C13.2723 10.274 13.3812 10.6818 13.1968 11.0005C12.1604 12.7921 10.2216 14 7.99996 14C4.91159 14 2.36821 11.6666 2.03658 8.66667H1.33329C0.965103 8.66667 0.666626 8.36819 0.666626 8C0.666626 7.63181 0.965103 7.33333 1.33329 7.33333H2.03658C2.36821 4.33337 4.91159 2 7.99996 2C10.2216 2 12.1604 3.20785 13.1968 4.99952C13.3812 5.31823 13.2723 5.72605 12.9536 5.91041C12.6349 6.09477 12.227 5.98585 12.0427 5.66714C11.2348 4.27054 9.72643 3.33333 7.99996 3.33333ZM7.99996 6C6.89539 6 5.99996 6.89543 5.99996 8C5.99996 9.10455 6.89539 10 7.99996 10C9.1045 10 9.99996 9.10454 9.99996 8C9.99996 6.89543 9.10451 6 7.99996 6ZM4.66663 8C4.66663 6.15905 6.15901 4.66667 7.99996 4.66667C9.61257 4.66667 10.9578 5.81184 11.2666 7.33333H14.6666C15.0348 7.33333 15.3333 7.63181 15.3333 8C15.3333 8.36819 15.0348 8.66667 14.6666 8.66667H11.2666C10.9578 10.1881 9.61257 11.3333 7.99996 11.3333C6.159 11.3333 4.66663 9.84092 4.66663 8Z" fill="#354052"/>
</g>
</svg>

After

Width:  |  Height:  |  Size: 1.3 KiB

View File

@ -0,0 +1,3 @@
<svg width="13" height="20" viewBox="0 0 13 20" fill="none" xmlns="http://www.w3.org/2000/svg">
<path id="Shape" d="M0 0H13V20C9.98017 20 7.26458 18.1615 6.14305 15.3576L0 0Z" fill="#F9FAFB"/>
</svg>

After

Width:  |  Height:  |  Size: 200 B

View File

@ -0,0 +1,38 @@
{
"icon": {
"type": "element",
"isRootNode": true,
"name": "svg",
"attributes": {
"width": "16",
"height": "16",
"viewBox": "0 0 16 16",
"fill": "none",
"xmlns": "http://www.w3.org/2000/svg"
},
"children": [
{
"type": "element",
"name": "g",
"attributes": {
"id": "Icon L"
},
"children": [
{
"type": "element",
"name": "path",
"attributes": {
"id": "Vector",
"fill-rule": "evenodd",
"clip-rule": "evenodd",
"d": "M7.99996 3.33333C5.42263 3.33333 3.33329 5.42267 3.33329 8C3.33329 10.5773 5.42263 12.6667 7.99996 12.6667C9.72643 12.6667 11.2348 11.7295 12.0427 10.3329C12.227 10.0141 12.6349 9.90523 12.9536 10.0896C13.2723 10.274 13.3812 10.6818 13.1968 11.0005C12.1604 12.7921 10.2216 14 7.99996 14C4.91159 14 2.36821 11.6666 2.03658 8.66667H1.33329C0.965103 8.66667 0.666626 8.36819 0.666626 8C0.666626 7.63181 0.965103 7.33333 1.33329 7.33333H2.03658C2.36821 4.33337 4.91159 2 7.99996 2C10.2216 2 12.1604 3.20785 13.1968 4.99952C13.3812 5.31823 13.2723 5.72605 12.9536 5.91041C12.6349 6.09477 12.227 5.98585 12.0427 5.66714C11.2348 4.27054 9.72643 3.33333 7.99996 3.33333ZM7.99996 6C6.89539 6 5.99996 6.89543 5.99996 8C5.99996 9.10455 6.89539 10 7.99996 10C9.1045 10 9.99996 9.10454 9.99996 8C9.99996 6.89543 9.10451 6 7.99996 6ZM4.66663 8C4.66663 6.15905 6.15901 4.66667 7.99996 4.66667C9.61257 4.66667 10.9578 5.81184 11.2666 7.33333H14.6666C15.0348 7.33333 15.3333 7.63181 15.3333 8C15.3333 8.36819 15.0348 8.66667 14.6666 8.66667H11.2666C10.9578 10.1881 9.61257 11.3333 7.99996 11.3333C6.159 11.3333 4.66663 9.84092 4.66663 8Z",
"fill": "currentColor"
},
"children": []
}
]
}
]
},
"name": "ApiConnectionMod"
}

View File

@ -0,0 +1,16 @@
// GENERATE BY script
// DON NOT EDIT IT MANUALLY
import * as React from 'react'
import data from './ApiConnectionMod.json'
import IconBase from '@/app/components/base/icons/IconBase'
import type { IconBaseProps, IconData } from '@/app/components/base/icons/IconBase'
const Icon = React.forwardRef<React.MutableRefObject<SVGElement>, Omit<IconBaseProps, 'data'>>((
props,
ref,
) => <IconBase {...props} ref={ref} data={data as IconData} />)
Icon.displayName = 'ApiConnectionMod'
export default Icon

View File

@ -1,3 +1,4 @@
export { default as ApiConnectionMod } from './ApiConnectionMod'
export { default as ApiConnection } from './ApiConnection'
export { default as BarChartSquare02 } from './BarChartSquare02'
export { default as Container } from './Container'

View File

@ -0,0 +1,27 @@
{
"icon": {
"type": "element",
"isRootNode": true,
"name": "svg",
"attributes": {
"width": "13",
"height": "20",
"viewBox": "0 0 13 20",
"fill": "none",
"xmlns": "http://www.w3.org/2000/svg"
},
"children": [
{
"type": "element",
"name": "path",
"attributes": {
"id": "Shape",
"d": "M0 0H13V20C9.98017 20 7.26458 18.1615 6.14305 15.3576L0 0Z",
"fill": "currentColor"
},
"children": []
}
]
},
"name": "Corner"
}

View File

@ -0,0 +1,16 @@
// GENERATE BY script
// DON NOT EDIT IT MANUALLY
import * as React from 'react'
import data from './Corner.json'
import IconBase from '@/app/components/base/icons/IconBase'
import type { IconBaseProps, IconData } from '@/app/components/base/icons/IconBase'
const Icon = React.forwardRef<React.MutableRefObject<SVGElement>, Omit<IconBaseProps, 'data'>>((
props,
ref,
) => <IconBase {...props} ref={ref} data={data as IconData} />)
Icon.displayName = 'Corner'
export default Icon

View File

@ -1,2 +1,3 @@
export { default as Corner } from './Corner'
export { default as Star04 } from './Star04'
export { default as Star06 } from './Star06'

View File

@ -3,5 +3,5 @@
}
.modal-panel {
@apply w-full max-w-md transform rounded-2xl bg-white p-6 text-left align-middle shadow-xl transition-all;
@apply w-full max-w-[480px] transform rounded-2xl bg-white p-6 text-left align-middle shadow-xl transition-all;
}

View File

@ -37,6 +37,7 @@ const ParamItem: FC<Props> = ({ className, id, name, noTooltip, tip, step = 0.1,
<span className="mx-1 text-gray-900 text-[13px] leading-[18px] font-medium">{name}</span>
{!noTooltip && (
<Tooltip
triggerClassName='w-4 h-4 shrink-0'
popupContent={<div className="w-[200px]">{tip}</div>}
/>
)}

View File

@ -87,7 +87,7 @@ const Select: FC<ISelectProps> = ({
<div className='group text-gray-800'>
{allowSearch
? <Combobox.Input
className={`w-full rounded-lg border-0 ${bgClassName} py-1.5 pl-3 pr-10 shadow-sm sm:text-sm sm:leading-6 focus-visible:outline-none focus-visible:bg-gray-200 group-hover:bg-gray-200 cursor-not-allowed`}
className={`w-full rounded-lg border-0 ${bgClassName} py-1.5 pl-3 pr-10 shadow-sm sm:text-sm sm:leading-6 focus-visible:outline-none focus-visible:bg-gray-200 group-hover:bg-gray-200 ${disabled ? 'cursor-not-allowed' : 'cursor-pointer'}`}
onChange={(event) => {
if (!disabled)
setQuery(event.target.value)

View File

@ -36,6 +36,12 @@ export type UsageScene = 'doc' | 'hitTesting'
type ISegmentCardProps = {
loading: boolean
detail?: SegmentDetailModel & { document: { name: string } }
contentExternal?: string
refSource?: {
title: string
uri: string
}
isExternal?: boolean
score?: number
onClick?: () => void
onChangeSwitch?: (segId: string, enabled: boolean) => Promise<void>
@ -48,6 +54,9 @@ type ISegmentCardProps = {
const SegmentCard: FC<ISegmentCardProps> = ({
detail = {},
contentExternal,
isExternal,
refSource,
score,
onClick,
onChangeSwitch,
@ -88,6 +97,9 @@ const SegmentCard: FC<ISegmentCardProps> = ({
)
}
if (contentExternal)
return contentExternal
return content
}
@ -199,16 +211,16 @@ const SegmentCard: FC<ISegmentCardProps> = ({
</div>
<div className={cn('w-full bg-gray-50 group-hover:bg-white')}>
<Divider />
<div className="relative flex items-center w-full">
<div className="relative flex items-center w-full pb-1">
<DocumentTitle
name={detail?.document?.name || ''}
extension={(detail?.document?.name || '').split('.').pop() || 'txt'}
name={detail?.document?.name || refSource?.title || ''}
extension={(detail?.document?.name || refSource?.title || '').split('.').pop() || 'txt'}
wrapperCls='w-full'
iconCls="!h-4 !w-4 !bg-contain"
textCls="text-xs text-gray-700 !font-normal overflow-hidden whitespace-nowrap text-ellipsis"
/>
<div className={cn(s.chartLinkText, 'group-hover:inline-flex')}>
{t('datasetHitTesting.viewChart')}
{isExternal ? t('datasetHitTesting.viewDetail') : t('datasetHitTesting.viewChart')}
<ArrowUpRightIcon className="w-3 h-3 ml-1 stroke-current stroke-2" />
</div>
</div>

View File

@ -0,0 +1,16 @@
export type CreateExternalAPIReq = {
name: string
settings: {
endpoint: string
api_key: string
}
}
export type FormSchema = {
variable: string
type: 'text' | 'secret'
label: {
[key: string]: string
}
required: boolean
}

View File

@ -0,0 +1,90 @@
import React, { useState } from 'react'
import type { FC } from 'react'
import { useTranslation } from 'react-i18next'
import { RiBookOpenLine } from '@remixicon/react'
import type { CreateExternalAPIReq, FormSchema } from '../declarations'
import Input from '@/app/components/base/input'
import cn from '@/utils/classnames'
type FormProps = {
className?: string
itemClassName?: string
fieldLabelClassName?: string
value: CreateExternalAPIReq
onChange: (val: CreateExternalAPIReq) => void
formSchemas: FormSchema[]
inputClassName?: string
}
const Form: FC<FormProps> = React.memo(({
className,
itemClassName,
fieldLabelClassName,
value,
onChange,
formSchemas,
inputClassName,
}) => {
const { t, i18n } = useTranslation()
const [changeKey, setChangeKey] = useState('')
const handleFormChange = (key: string, val: string) => {
setChangeKey(key)
if (key === 'name') {
onChange({ ...value, [key]: val })
}
else {
onChange({
...value,
settings: {
...value.settings,
[key]: val,
},
})
}
}
const renderField = (formSchema: FormSchema) => {
const { variable, type, label, required } = formSchema
const fieldValue = variable === 'name' ? value[variable] : (value.settings[variable as keyof typeof value.settings] || '')
return (
<div key={variable} className={cn(itemClassName, 'flex flex-col items-start gap-1 self-stretch')}>
<div className="flex justify-between items-center w-full">
<label className={cn(fieldLabelClassName, 'text-text-secondary system-sm-semibold')} htmlFor={variable}>
{label[i18n.language] || label.en_US}
{required && <span className='ml-1 text-red-500'>*</span>}
</label>
{variable === 'endpoint' && (
<a
href={'https://docs.dify.ai/guides/knowledge-base/external-knowledge-api-documentation' || '/'}
target='_blank'
rel='noopener noreferrer'
className='text-text-accent body-xs-regular flex items-center'
>
<RiBookOpenLine className='w-3 h-3 text-text-accent mr-1' />
{t('dataset.externalAPIPanelDocumentation')}
</a>
)}
</div>
<Input
type={type === 'secret' ? 'password' : 'text'}
id={variable}
name={variable}
value={fieldValue}
onChange={val => handleFormChange(variable, val.target.value)}
required={required}
className={cn(inputClassName)}
/>
</div>
)
}
return (
<form className={cn('flex flex-col justify-center items-start gap-4 self-stretch', className)}>
{formSchemas.map(formSchema => renderField(formSchema))}
</form>
)
})
export default Form

View File

@ -0,0 +1,218 @@
import type { FC } from 'react'
import {
memo,
useEffect,
useState,
} from 'react'
import { useTranslation } from 'react-i18next'
import {
RiBook2Line,
RiCloseLine,
RiInformation2Line,
RiLock2Fill,
} from '@remixicon/react'
import type { CreateExternalAPIReq, FormSchema } from '../declarations'
import Form from './Form'
import ActionButton from '@/app/components/base/action-button'
import Confirm from '@/app/components/base/confirm'
import {
PortalToFollowElem,
PortalToFollowElemContent,
} from '@/app/components/base/portal-to-follow-elem'
import { createExternalAPI } from '@/service/datasets'
import { useToastContext } from '@/app/components/base/toast'
import Button from '@/app/components/base/button'
import Tooltip from '@/app/components/base/tooltip'
type AddExternalAPIModalProps = {
data?: CreateExternalAPIReq
onSave: (formValue: CreateExternalAPIReq) => void
onCancel: () => void
onEdit?: (formValue: CreateExternalAPIReq) => Promise<void>
datasetBindings?: { id: string; name: string }[]
isEditMode: boolean
}
const formSchemas: FormSchema[] = [
{
variable: 'name',
type: 'text',
label: {
en_US: 'Name',
},
required: true,
},
{
variable: 'endpoint',
type: 'text',
label: {
en_US: 'API Endpoint',
},
required: true,
},
{
variable: 'api_key',
type: 'secret',
label: {
en_US: 'API Key',
},
required: true,
},
]
const AddExternalAPIModal: FC<AddExternalAPIModalProps> = ({ data, onSave, onCancel, datasetBindings, isEditMode, onEdit }) => {
const { t } = useTranslation()
const { notify } = useToastContext()
const [loading, setLoading] = useState(false)
const [showConfirm, setShowConfirm] = useState(false)
const [formData, setFormData] = useState<CreateExternalAPIReq>({ name: '', settings: { endpoint: '', api_key: '' } })
useEffect(() => {
if (isEditMode && data)
setFormData(data)
}, [isEditMode, data])
const hasEmptyInputs = Object.values(formData).some(value =>
typeof value === 'string' ? value.trim() === '' : Object.values(value).some(v => v.trim() === ''),
)
const handleDataChange = (val: CreateExternalAPIReq) => {
setFormData(val)
}
const handleSave = async () => {
if (formData && formData.settings.api_key && formData.settings.api_key?.length < 5) {
notify({ type: 'error', message: t('common.apiBasedExtension.modal.apiKey.lengthError') })
setLoading(false)
return
}
try {
setLoading(true)
if (isEditMode && onEdit) {
await onEdit(
{
...formData,
settings: { ...formData.settings, api_key: formData.settings.api_key ? '[__HIDDEN__]' : formData.settings.api_key },
},
)
notify({ type: 'success', message: 'External API updated successfully' })
}
else {
const res = await createExternalAPI({ body: formData })
if (res && res.id) {
notify({ type: 'success', message: 'External API saved successfully' })
onSave(res)
}
}
onCancel()
}
catch (error) {
console.error('Error saving/updating external API:', error)
notify({ type: 'error', message: 'Failed to save/update External API' })
}
finally {
setLoading(false)
}
}
return (
<PortalToFollowElem open>
<PortalToFollowElemContent className='w-full h-full z-[60]'>
<div className='fixed inset-0 flex items-center justify-center bg-black/[.25]'>
<div className='flex relative w-[480px] flex-col items-start bg-components-panel-bg rounded-2xl border-[0.5px] border-components-panel-border shadows-shadow-xl'>
<div className='flex flex-col pt-6 pl-6 pb-3 pr-14 items-start gap-2 self-stretch'>
<div className='self-stretch text-text-primary title-2xl-semi-bold flex-grow'>
{
isEditMode ? t('dataset.editExternalAPIFormTitle') : t('dataset.createExternalAPI')
}
</div>
{isEditMode && (datasetBindings?.length ?? 0) > 0 && (
<div className='text-text-tertiary system-xs-regular flex items-center'>
{t('dataset.editExternalAPIFormWarning.front')}
<span className='text-text-accent cursor-pointer flex items-center'>
&nbsp;{datasetBindings?.length} {t('dataset.editExternalAPIFormWarning.end')}&nbsp;
<Tooltip
popupClassName='flex items-center self-stretch w-[320px]'
popupContent={
<div className='p-1'>
<div className='flex pt-1 pb-0.5 pl-2 pr-3 items-start self-stretch'>
<div className='text-text-tertiary system-xs-medium-uppercase'>{`${datasetBindings?.length} ${t('dataset.editExternalAPITooltipTitle')}`}</div>
</div>
{datasetBindings?.map(binding => (
<div key={binding.id} className='flex px-2 py-1 items-center gap-1 self-stretch'>
<RiBook2Line className='w-4 h-4 text-text-secondary' />
<div className='text-text-secondary system-sm-medium'>{binding.name}</div>
</div>
))}
</div>
}
asChild={false}
position='bottom'
>
<RiInformation2Line className='w-3.5 h-3.5' />
</Tooltip>
</span>
</div>
)}
</div>
<ActionButton className='absolute top-5 right-5' onClick={onCancel}>
<RiCloseLine className='w-[18px] h-[18px] text-text-tertiary flex-shrink-0' />
</ActionButton>
<Form
value={formData}
onChange={handleDataChange}
formSchemas={formSchemas}
className='flex px-6 py-3 flex-col justify-center items-start gap-4 self-stretch'
/>
<div className='flex p-6 pt-5 justify-end items-center gap-2 self-stretch'>
<Button type='button' variant='secondary' onClick={onCancel}>
{t('dataset.externalAPIForm.cancel')}
</Button>
<Button
type='submit'
variant='primary'
onClick={() => {
if (isEditMode && (datasetBindings?.length ?? 0) > 0)
setShowConfirm(true)
else if (isEditMode && onEdit)
onEdit(formData)
else
handleSave()
}}
disabled={hasEmptyInputs || loading}
>
{t('dataset.externalAPIForm.save')}
</Button>
</div>
<div className='flex px-2 py-3 justify-center items-center gap-1 self-stretch rounded-b-2xl
border-t-[0.5px] border-divider-subtle bg-background-soft text-text-tertiary system-xs-regular'
>
<RiLock2Fill className='w-3 h-3 text-text-quaternary' />
{t('dataset.externalAPIForm.encrypted.front')}
<a
className='text-text-accent'
target='_blank' rel='noopener noreferrer'
href='https://pycryptodome.readthedocs.io/en/latest/src/cipher/oaep.html'
>
PKCS1_OAEP
</a>
{t('dataset.externalAPIForm.encrypted.end')}
</div>
</div>
{showConfirm && (datasetBindings?.length ?? 0) > 0 && (
<Confirm
isShow={showConfirm}
type='warning'
title='Warning'
content={`${t('dataset.editExternalAPIConfirmWarningContent.front')} ${datasetBindings?.length} ${t('dataset.editExternalAPIConfirmWarningContent.end')}`}
onCancel={() => setShowConfirm(false)}
onConfirm={handleSave}
/>
)}
</div>
</PortalToFollowElemContent>
</PortalToFollowElem>
)
}
export default memo(AddExternalAPIModal)

View File

@ -0,0 +1,90 @@
import React from 'react'
import {
RiAddLine,
RiBookOpenLine,
RiCloseLine,
} from '@remixicon/react'
import { useTranslation } from 'react-i18next'
import ExternalKnowledgeAPICard from '../external-knowledge-api-card'
import cn from '@/utils/classnames'
import { useExternalKnowledgeApi } from '@/context/external-knowledge-api-context'
import ActionButton from '@/app/components/base/action-button'
import Button from '@/app/components/base/button'
import Loading from '@/app/components/base/loading'
import { useModalContext } from '@/context/modal-context'
type ExternalAPIPanelProps = {
onClose: () => void
}
const ExternalAPIPanel: React.FC<ExternalAPIPanelProps> = ({ onClose }) => {
const { t } = useTranslation()
const { setShowExternalKnowledgeAPIModal } = useModalContext()
const { externalKnowledgeApiList, mutateExternalKnowledgeApis, isLoading } = useExternalKnowledgeApi()
const handleOpenExternalAPIModal = () => {
setShowExternalKnowledgeAPIModal({
payload: { name: '', settings: { endpoint: '', api_key: '' } },
datasetBindings: [],
onSaveCallback: () => {
mutateExternalKnowledgeApis()
},
onCancelCallback: () => {
mutateExternalKnowledgeApis()
},
isEditMode: false,
})
}
return (
<div
tabIndex={-1}
className={cn('absolute top-14 right-0 bottom-2 flex z-10 outline-none')}
>
<div
className={cn(
'relative flex flex-col w-[420px] bg-components-panel-bg-alt rounded-l-2xl h-full border border-components-panel-border',
)}
>
<div className='flex items-start self-stretch p-4 pb-0'>
<div className='flex flex-col items-start gap-1 flex-grow'>
<div className='self-stretch text-text-primary system-xl-semibold'>{t('dataset.externalAPIPanelTitle')}</div>
<div className='self-stretch text-text-tertiary body-xs-regular'>{t('dataset.externalAPIPanelDescription')}</div>
<a className='flex justify-center items-center gap-1 self-stretch cursor-pointer' href='https://docs.dify.ai/guides/knowledge-base/external-knowledge-api-documentation' target='_blank'>
<RiBookOpenLine className='w-3 h-3 text-text-accent' />
<div className='flex-grow text-text-accent body-xs-regular'>{t('dataset.externalAPIPanelDocumentation')}</div>
</a>
</div>
<div className='flex items-center'>
<ActionButton onClick={() => onClose()}>
<RiCloseLine className='w-4 h-4 text-text-tertiary' />
</ActionButton>
</div>
</div>
<div className='flex px-4 py-3 flex-col justify-center items-start gap-2 self-stretch'>
<Button
variant={'primary'}
className='flex justify-center items-center px-3 py-2 gap-0.5'
onClick={handleOpenExternalAPIModal}
>
<RiAddLine className='w-4 h-4 text-components-button-primary-text' />
<div className='text-components-button-primary-text system-sm-medium'>{t('dataset.createExternalAPI')}</div>
</Button>
</div>
<div className='flex py-0 px-4 flex-col items-start gap-1 flex-grow self-stretch'>
{isLoading
? (
<Loading />
)
: (
externalKnowledgeApiList.map(api => (
<ExternalKnowledgeAPICard key={api.id} api={api} />
))
)}
</div>
</div>
</div>
)
}
export default ExternalAPIPanel

View File

@ -0,0 +1,151 @@
import React, { useState } from 'react'
import { useTranslation } from 'react-i18next'
import {
RiDeleteBinLine,
RiEditLine,
} from '@remixicon/react'
import type { CreateExternalAPIReq } from '../declarations'
import type { ExternalAPIItem } from '@/models/datasets'
import { checkUsageExternalAPI, deleteExternalAPI, fetchExternalAPI, updateExternalAPI } from '@/service/datasets'
import { ApiConnectionMod } from '@/app/components/base/icons/src/vender/solid/development'
import { useExternalKnowledgeApi } from '@/context/external-knowledge-api-context'
import { useModalContext } from '@/context/modal-context'
import ActionButton from '@/app/components/base/action-button'
import Confirm from '@/app/components/base/confirm'
type ExternalKnowledgeAPICardProps = {
api: ExternalAPIItem
}
const ExternalKnowledgeAPICard: React.FC<ExternalKnowledgeAPICardProps> = ({ api }) => {
const { setShowExternalKnowledgeAPIModal } = useModalContext()
const [showConfirm, setShowConfirm] = useState(false)
const [isHovered, setIsHovered] = useState(false)
const [usageCount, setUsageCount] = useState(0)
const { mutateExternalKnowledgeApis } = useExternalKnowledgeApi()
const { t } = useTranslation()
const handleEditClick = async () => {
try {
const response = await fetchExternalAPI({ apiTemplateId: api.id })
const formValue: CreateExternalAPIReq = {
name: response.name,
settings: {
endpoint: response.settings.endpoint,
api_key: response.settings.api_key,
},
}
setShowExternalKnowledgeAPIModal({
payload: formValue,
onSaveCallback: () => {
mutateExternalKnowledgeApis()
},
onCancelCallback: () => {
mutateExternalKnowledgeApis()
},
isEditMode: true,
datasetBindings: response.dataset_bindings,
onEditCallback: async (updatedData: CreateExternalAPIReq) => {
try {
await updateExternalAPI({
apiTemplateId: api.id,
body: {
...response,
name: updatedData.name,
settings: {
...response.settings,
endpoint: updatedData.settings.endpoint,
api_key: updatedData.settings.api_key,
},
},
})
mutateExternalKnowledgeApis()
}
catch (error) {
console.error('Error updating external knowledge API:', error)
}
},
})
}
catch (error) {
console.error('Error fetching external knowledge API data:', error)
}
}
const handleDeleteClick = async () => {
try {
const usage = await checkUsageExternalAPI({ apiTemplateId: api.id })
if (usage.is_using)
setUsageCount(usage.count)
setShowConfirm(true)
}
catch (error) {
console.error('Error checking external API usage:', error)
}
}
const handleConfirmDelete = async () => {
try {
const response = await deleteExternalAPI({ apiTemplateId: api.id })
if (response && response.result === 'success') {
setShowConfirm(false)
mutateExternalKnowledgeApis()
}
else {
console.error('Failed to delete external API')
}
}
catch (error) {
console.error('Error deleting external knowledge API:', error)
}
}
return (
<>
<div className={`flex p-2 pl-3 items-start self-stretch rounded-lg border-[0.5px]
border-components-panel-border-subtle bg-components-panel-on-panel-item-bg
shadows-shadow-xs ${isHovered ? 'bg-state-destructive-hover border-state-destructive-border' : ''}`}
>
<div className='flex py-1 flex-col justify-center items-start gap-1.5 flex-grow'>
<div className='flex items-center gap-1 self-stretch text-text-secondary'>
<ApiConnectionMod className='w-4 h-4' />
<div className='system-sm-medium'>{api.name}</div>
</div>
<div className='self-stretch text-text-tertiary system-xs-regular'>{api.settings.endpoint}</div>
</div>
<div className='flex items-start gap-1'>
<ActionButton onClick={handleEditClick}>
<RiEditLine className='w-4 h-4 text-text-tertiary hover:text-text-secondary' />
</ActionButton>
<ActionButton
className='hover:bg-state-destructive-hover'
onClick={handleDeleteClick}
onMouseEnter={() => setIsHovered(true)}
onMouseLeave={() => setIsHovered(false)}
>
<RiDeleteBinLine className='w-4 h-4 text-text-tertiary hover:text-text-destructive' />
</ActionButton>
</div>
</div>
{showConfirm && (
<Confirm
isShow={showConfirm}
title={`${t('dataset.deleteExternalAPIConfirmWarningContent.title.front')} ${api.name}${t('dataset.deleteExternalAPIConfirmWarningContent.title.end')}`}
content={
usageCount > 0
? `${t('dataset.deleteExternalAPIConfirmWarningContent.content.front')} ${usageCount} ${t('dataset.deleteExternalAPIConfirmWarningContent.content.end')}`
: t('dataset.deleteExternalAPIConfirmWarningContent.noConnectionContent')
}
type='warning'
onConfirm={handleConfirmDelete}
onCancel={() => setShowConfirm(false)}
/>
)}
</>
)
}
export default ExternalKnowledgeAPICard

View File

@ -0,0 +1,36 @@
'use client'
import React, { useState } from 'react'
import { useRouter } from 'next/navigation'
import { useToastContext } from '@/app/components/base/toast'
import ExternalKnowledgeBaseCreate from '@/app/components/datasets/external-knowledge-base/create'
import type { CreateKnowledgeBaseReq } from '@/app/components/datasets/external-knowledge-base/create/declarations'
import { createExternalKnowledgeBase } from '@/service/datasets'
const ExternalKnowledgeBaseConnector = () => {
const { notify } = useToastContext()
const [loading, setLoading] = useState(false)
const router = useRouter()
const handleConnect = async (formValue: CreateKnowledgeBaseReq) => {
try {
setLoading(true)
const result = await createExternalKnowledgeBase({ body: formValue })
if (result && result.id) {
notify({ type: 'success', message: 'External Knowledge Base Connected Successfully' })
router.back()
}
else { throw new Error('Failed to create external knowledge base') }
}
catch (error) {
console.error('Error creating external knowledge base:', error)
notify({ type: 'error', message: 'Failed to connect External Knowledge Base' })
}
setLoading(false)
}
return (
<ExternalKnowledgeBaseCreate onConnect={handleConnect} loading={loading} />
)
}
export default ExternalKnowledgeBaseConnector

View File

@ -0,0 +1,110 @@
import React, { useEffect, useState } from 'react'
import {
RiAddLine,
RiArrowDownSLine,
} from '@remixicon/react'
import { useTranslation } from 'react-i18next'
import { useRouter } from 'next/navigation'
import { ApiConnectionMod } from '@/app/components/base/icons/src/vender/solid/development'
import { useModalContext } from '@/context/modal-context'
import { useExternalKnowledgeApi } from '@/context/external-knowledge-api-context'
type ApiItem = {
value: string
name: string
url: string
}
type ExternalApiSelectProps = {
items: ApiItem[]
value?: string
onSelect: (item: ApiItem) => void
}
const ExternalApiSelect: React.FC<ExternalApiSelectProps> = ({ items, value, onSelect }) => {
const { t } = useTranslation()
const [isOpen, setIsOpen] = useState(false)
const [selectedItem, setSelectedItem] = useState<ApiItem | null>(
items.find(item => item.value === value) || null,
)
const { setShowExternalKnowledgeAPIModal } = useModalContext()
const { mutateExternalKnowledgeApis } = useExternalKnowledgeApi()
const router = useRouter()
useEffect(() => {
const newSelectedItem = items.find(item => item.value === value) || null
setSelectedItem(newSelectedItem)
}, [value, items])
const handleAddNewAPI = () => {
setShowExternalKnowledgeAPIModal({
payload: { name: '', settings: { endpoint: '', api_key: '' } },
onSaveCallback: async () => {
mutateExternalKnowledgeApis()
router.refresh()
},
onCancelCallback: () => {
mutateExternalKnowledgeApis()
},
isEditMode: false,
})
}
const handleSelect = (item: ApiItem) => {
setSelectedItem(item)
onSelect(item)
setIsOpen(false)
}
return (
<div className="relative w-full">
<div
className={`flex items-center justify-between cursor-pointer px-2 py-1 gap-0.5 self-stretch rounded-lg
bg-components-input-bg-normal hover:bg-state-base-hover-alt ${isOpen && 'bg-state-base-hover-alt'}`}
onClick={() => setIsOpen(!isOpen)}
>
{selectedItem
? (
<div className="flex p-1 items-center gap-2 self-stretch rounded-lg">
<ApiConnectionMod className='text-text-secondary w-4 h-4' />
<div className='flex items-center flex-grow'>
<span className='text-components-input-text-filled text-ellipsis system-sm-regular overflow-hidden'>{selectedItem.name}</span>
</div>
</div>
)
: (
<span className='text-components-input-text-placeholder system-sm-regular'>{t('dataset.selectExternalKnowledgeAPI.placeholder')}</span>
)}
<RiArrowDownSLine className={`w-4 h-4 text-text-quaternary transition-transform ${isOpen ? 'text-text-secondary' : ''}`} />
</div>
{isOpen && (
<div className="absolute z-10 w-full mt-1 bg-components-panel-bg-blur border rounded-xl shadow-lg">
{items.map(item => (
<div
key={item.value}
className="flex p-1 items-center cursor-pointer"
onClick={() => handleSelect(item)}
>
<div className="flex p-2 items-center gap-2 self-stretch rounded-lg hover:bg-state-base-hover w-full">
<ApiConnectionMod className='text-text-secondary w-4 h-4' />
<span className='text-text-secondary text-ellipsis system-sm-medium overflow-hidden flex-grow'>{item.name}</span>
<span className='text-text-tertiary overflow-hidden text-right text-ellipsis system-xs-regular'>{item.url}</span>
</div>
</div>
))}
<div className='flex p-1 flex-col items-start self-stretch'>
<div
className='flex p-2 items-center gap-2 self-stretch rounded-lg cursor-pointer hover:bg-state-base-hover'
onClick={handleAddNewAPI}
>
<RiAddLine className='text-text-secondary w-4 h-4' />
<span className='flex-grow overflow-hidden text-text-secondary text-ellipsis system-sm-medium'>{t('dataset.createNewExternalAPI')}</span>
</div>
</div>
</div>
)}
</div>
)
}
export default ExternalApiSelect

View File

@ -0,0 +1,96 @@
'use client'
import React, { useEffect, useState } from 'react'
import { useTranslation } from 'react-i18next'
import { RiAddLine } from '@remixicon/react'
import { useRouter } from 'next/navigation'
import ExternalApiSelect from './ExternalApiSelect'
import Input from '@/app/components/base/input'
import Button from '@/app/components/base/button'
import { useModalContext } from '@/context/modal-context'
import { useExternalKnowledgeApi } from '@/context/external-knowledge-api-context'
type ExternalApiSelectionProps = {
external_knowledge_api_id: string
external_knowledge_id: string
onChange: (data: { external_knowledge_api_id?: string; external_knowledge_id?: string }) => void
}
const ExternalApiSelection: React.FC<ExternalApiSelectionProps> = ({ external_knowledge_api_id, external_knowledge_id, onChange }) => {
const { t } = useTranslation()
const router = useRouter()
const { externalKnowledgeApiList } = useExternalKnowledgeApi()
const [selectedApiId, setSelectedApiId] = useState(external_knowledge_api_id)
const { setShowExternalKnowledgeAPIModal } = useModalContext()
const { mutateExternalKnowledgeApis } = useExternalKnowledgeApi()
const apiItems = externalKnowledgeApiList.map(api => ({
value: api.id,
name: api.name,
url: api.settings.endpoint,
}))
useEffect(() => {
if (apiItems.length > 0) {
const newSelectedId = external_knowledge_api_id || apiItems[0].value
setSelectedApiId(newSelectedId)
if (newSelectedId !== external_knowledge_api_id)
onChange({ external_knowledge_api_id: newSelectedId, external_knowledge_id })
}
}, [apiItems, external_knowledge_api_id, external_knowledge_id, onChange])
const handleAddNewAPI = () => {
setShowExternalKnowledgeAPIModal({
payload: { name: '', settings: { endpoint: '', api_key: '' } },
onSaveCallback: async () => {
mutateExternalKnowledgeApis()
router.refresh()
},
onCancelCallback: () => {
mutateExternalKnowledgeApis()
},
isEditMode: false,
})
}
useEffect(() => {
if (!external_knowledge_api_id && apiItems.length > 0)
onChange({ external_knowledge_api_id: apiItems[0].value, external_knowledge_id })
}, [])
return (
<form className='flex flex-col gap-4 self-stretch'>
<div className='flex flex-col gap-1 self-stretch'>
<div className='flex flex-col self-stretch'>
<label className='text-text-secondary system-sm-semibold'>{t('dataset.externalAPIPanelTitle')}</label>
</div>
{apiItems.length > 0
? <ExternalApiSelect
items={apiItems}
value={selectedApiId}
onSelect={(e) => {
setSelectedApiId(e.value)
onChange({ external_knowledge_api_id: e.value, external_knowledge_id })
}}
/>
: <Button variant={'tertiary'} onClick={handleAddNewAPI} className='justify-start gap-0.5'>
<RiAddLine className='w-4 h-4 text-text-tertiary' />
<span className='text-text-tertiary system-sm-regular'>{t('dataset.noExternalKnowledge')}</span>
</Button>
}
</div>
<div className='flex flex-col gap-1 self-stretch'>
<div className='flex flex-col self-stretch'>
<label className='text-text-secondary system-sm-semibold'>{t('dataset.externalKnowledgeId')}</label>
</div>
<Input
value={external_knowledge_id}
onChange={e => onChange({ external_knowledge_id: e.target.value, external_knowledge_api_id })}
placeholder={t('dataset.externalKnowledgeIdPlaceholder') ?? ''}
/>
</div>
</form>
)
}
export default ExternalApiSelection

View File

@ -0,0 +1,33 @@
import { RiBookOpenLine } from '@remixicon/react'
import { useTranslation } from 'react-i18next'
const InfoPanel = () => {
const { t } = useTranslation()
return (
<div className='flex w-[360px] pt-[108px] pb-2 pr-8 flex-col items-start'>
<div className='flex min-w-[240px] w-full p-6 flex-col items-start gap-3 self-stretch rounded-xl bg-background-section'>
<div className='flex p-1 w-10 h-10 justify-center items-center gap-2 flex-grow self-stretch rounded-lg border-[0.5px] border-components-card-border bg-components-card-bg'>
<RiBookOpenLine className='w-5 h-5 text-text-accent' />
</div>
<p className='flex flex-col items-start gap-2 self-stretch'>
<span className='self-stretch text-text-secondary system-xl-semibold'>
{t('dataset.connectDatasetIntro.title')}
</span>
<span className='text-text-tertiary system-sm-regular'>
{t('dataset.connectDatasetIntro.content.front')}
<a className='text-text-accent system-sm-regular ml-1' href='https://docs.dify.ai/guides/knowledge-base/external-knowledge-api-documentation' target='_blank' rel="noopener noreferrer">
{t('dataset.connectDatasetIntro.content.link')}
</a>
{t('dataset.connectDatasetIntro.content.end')}
</span>
<a className='self-stretch text-text-accent system-sm-regular' href='https://docs.dify.ai/guides/knowledge-base/connect-external-knowledge' target='_blank' rel="noopener noreferrer">
{t('dataset.connectDatasetIntro.learnMore')}
</a>
</p>
</div>
</div>
)
}
export default InfoPanel

View File

@ -0,0 +1,65 @@
import React from 'react'
import { RiBookOpenLine } from '@remixicon/react'
import { useTranslation } from 'react-i18next'
import Input from '@/app/components/base/input'
type KnowledgeBaseInfoProps = {
name: string
description?: string
onChange: (data: { name?: string; description?: string }) => void
}
const KnowledgeBaseInfo: React.FC<KnowledgeBaseInfoProps> = ({ name, description, onChange }) => {
const { t } = useTranslation()
const handleNameChange = (e: React.ChangeEvent<HTMLInputElement>) => {
onChange({ name: e.target.value })
}
const handleDescriptionChange = (e: React.ChangeEvent<HTMLTextAreaElement>) => {
onChange({ description: e.target.value })
}
return (
<form className='flex flex-col gap-4 self-stretch'>
<div className='flex flex-col gap-4 self-stretch'>
<div className='flex flex-col gap-1 self-stretch'>
<div className='flex flex-col justify-center self-stretch'>
<label className='text-text-secondary system-sm-semibold'>{t('dataset.externalKnowledgeName')}</label>
</div>
<Input
value={name}
onChange={handleNameChange}
placeholder={t('dataset.externalKnowledgeNamePlaceholder') ?? ''}
/>
</div>
<div className='flex flex-col gap-1 self-stretch'>
<div className='flex flex-col justify-center self-stretch'>
<label className='text-text-secondary system-sm-semibold'>{t('dataset.externalKnowledgeDescription')}</label>
</div>
<div className='flex flex-col gap-1 self-stretch'>
<textarea
value={description}
onChange={ e => handleDescriptionChange(e)}
placeholder={t('dataset.externalKnowledgeDescriptionPlaceholder') ?? ''}
className={`flex h-20 py-2 p-3 self-stretch items-start rounded-lg bg-components-input-bg-normal ${description ? 'text-components-input-text-filled' : 'text-components-input-text-placeholder'} system-sm-regular`}
/>
<a
className='flex py-0.5 gap-1 self-stretch'
href='https://docs.dify.ai/features/datasets#how-to-write-a-good-dataset-description'
target="_blank"
rel="noopener noreferrer"
>
<div className='flex p-0.5 items-center gap-2'>
<RiBookOpenLine className='w-3 h-3 text-text-tertiary' />
</div>
<div className='flex-grow text-text-tertiary body-xs-regular'>{t('dataset.learnHowToWriteGoodKnowledgeDescription')}</div>
</a>
</div>
</div>
</div>
</form>
)
}
export default KnowledgeBaseInfo

View File

@ -0,0 +1,67 @@
import type { FC } from 'react'
import React from 'react'
import { useTranslation } from 'react-i18next'
import TopKItem from '@/app/components/base/param-item/top-k-item'
import ScoreThresholdItem from '@/app/components/base/param-item/score-threshold-item'
import cn from '@/utils/classnames'
type RetrievalSettingsProps = {
topK: number
scoreThreshold: number
scoreThresholdEnabled: boolean
isInHitTesting?: boolean
isInRetrievalSetting?: boolean
onChange: (data: { top_k?: number; score_threshold?: number; score_threshold_enabled?: boolean }) => void
}
const RetrievalSettings: FC<RetrievalSettingsProps> = ({
topK,
scoreThreshold,
scoreThresholdEnabled,
onChange,
isInHitTesting = false,
isInRetrievalSetting = false,
}) => {
const { t } = useTranslation()
const handleScoreThresholdChange = (enabled: boolean) => {
onChange({ score_threshold_enabled: enabled })
}
return (
<div className={cn('flex flex-col gap-2 self-stretch', isInRetrievalSetting && 'w-full max-w-[480px]')}>
{!isInHitTesting && !isInRetrievalSetting && <div className='flex h-7 pt-1 flex-col gap-2 self-stretch'>
<label className='text-text-secondary system-sm-semibold'>{t('dataset.retrievalSettings')}</label>
</div>}
<div className={cn(
'flex gap-4 self-stretch',
{
'flex-col': isInHitTesting,
'flex-row': isInRetrievalSetting,
'flex-col sm:flex-row': !isInHitTesting && !isInRetrievalSetting,
},
)}>
<div className='flex flex-col gap-1 flex-grow'>
<TopKItem
className='grow'
value={topK}
onChange={(_key, v) => onChange({ top_k: v })}
enable={true}
/>
</div>
<div className='flex flex-col gap-1 flex-grow'>
<ScoreThresholdItem
className='grow'
value={scoreThreshold}
onChange={(_key, v) => onChange({ score_threshold: v })}
enable={scoreThresholdEnabled}
hasSwitch={true}
onSwitchChange={(_key, v) => handleScoreThresholdChange(v)}
/>
</div>
</div>
</div>
)
}
export default RetrievalSettings

View File

@ -0,0 +1,12 @@
export type CreateKnowledgeBaseReq = {
name: string
description?: string
external_knowledge_api_id: string
provider: 'external'
external_knowledge_id: string
external_retrieval_model: {
top_k: number
score_threshold: number
score_threshold_enabled: boolean
}
}

View File

@ -0,0 +1,128 @@
'use client'
import { useCallback, useState } from 'react'
import { useRouter } from 'next/navigation'
import { RiArrowLeftLine, RiArrowRightLine } from '@remixicon/react'
import { useTranslation } from 'react-i18next'
import KnowledgeBaseInfo from './KnowledgeBaseInfo'
import ExternalApiSelection from './ExternalApiSelection'
import RetrievalSettings from './RetrievalSettings'
import InfoPanel from './InfoPanel'
import type { CreateKnowledgeBaseReq } from './declarations'
import Divider from '@/app/components/base/divider'
import Button from '@/app/components/base/button'
type ExternalKnowledgeBaseCreateProps = {
onConnect: (formValue: CreateKnowledgeBaseReq) => void
loading: boolean
}
const ExternalKnowledgeBaseCreate: React.FC<ExternalKnowledgeBaseCreateProps> = ({ onConnect, loading }) => {
const { t } = useTranslation()
const router = useRouter()
const [formData, setFormData] = useState<CreateKnowledgeBaseReq>({
name: '',
description: '',
external_knowledge_api_id: '',
external_knowledge_id: '',
external_retrieval_model: {
top_k: 2,
score_threshold: 0.5,
score_threshold_enabled: false,
},
provider: 'external',
})
const navBackHandle = useCallback(() => {
router.replace('/datasets')
}, [router])
const handleFormChange = (newData: CreateKnowledgeBaseReq) => {
setFormData(newData)
}
const isFormValid = formData.name.trim() !== ''
&& formData.external_knowledge_api_id !== ''
&& formData.external_knowledge_id !== ''
&& formData.external_retrieval_model.top_k !== undefined
&& formData.external_retrieval_model.score_threshold !== undefined
return (
<div className='flex flex-col flex-grow self-stretch rounded-t-2xl border-t border-effects-highlight bg-components-panel-bg'>
<div className='flex justify-center flex-grow self-stretch'>
<div className='flex w-full max-w-[960px] px-14 py-0 flex-col items-center'>
<div className='flex w-full max-w-[640px] pt-6 pb-8 flex-col grow items-center gap-4'>
<div className='relative flex flex-col py-2 items-center gap-[2px] self-stretch'>
<div className='flex-grow text-text-primary system-xl-semibold self-stretch'>{t('dataset.connectDataset')}</div>
<p className='text-text-tertiary system-sm-regular'>
<span>{t('dataset.connectHelper.helper1')}</span>
<span className='text-text-secondary system-sm-medium'>{t('dataset.connectHelper.helper2')}</span>
<span>{t('dataset.connectHelper.helper3')}</span>
<a className='self-stretch text-text-accent system-sm-regular' href='https://docs.dify.ai/guides/knowledge-base/connect-external-knowledge' target='_blank' rel="noopener noreferrer">
{t('dataset.connectHelper.helper4')}
</a>
<span>{t('dataset.connectHelper.helper5')} </span>
</p>
<Button
className='flex w-8 h-8 p-2 items-center justify-center absolute left-[-44px] top-1 rounded-full'
variant='tertiary'
onClick={navBackHandle}
>
<RiArrowLeftLine className='w-4 h-4 text-text-tertiary' />
</Button>
</div>
<KnowledgeBaseInfo
name={formData.name}
description={formData.description ?? ''}
onChange={data => handleFormChange({
...formData,
...data,
})}
/>
<Divider />
<ExternalApiSelection
external_knowledge_api_id={formData.external_knowledge_api_id}
external_knowledge_id={formData.external_knowledge_id}
onChange={data => handleFormChange({
...formData,
...data,
})}
/>
<RetrievalSettings
topK={formData.external_retrieval_model.top_k}
scoreThreshold={formData.external_retrieval_model.score_threshold}
scoreThresholdEnabled={formData.external_retrieval_model.score_threshold_enabled}
onChange={data => handleFormChange({
...formData,
external_retrieval_model: {
...formData.external_retrieval_model,
...data,
},
})}
/>
<div className='flex py-2 justify-end items-center gap-2 self-stretch'>
<Button variant='secondary' onClick={navBackHandle}>
<div className='text-components-button-secondary-text system-sm-medium'>{t('dataset.externalKnowledgeForm.cancel')}</div>
</Button>
<Button
variant='primary'
onClick={() => {
onConnect(formData)
}}
disabled={!isFormValid}
loading={loading}
>
<div className='text-components-button-primary-text system-sm-medium'>{t('dataset.externalKnowledgeForm.connect')}</div>
<RiArrowRightLine className='w-4 h-4 text-components-button-primary-text' />
</Button>
</div>
</div>
</div>
<InfoPanel />
</div>
</div>
)
}
export default ExternalKnowledgeBaseCreate

View File

@ -26,12 +26,15 @@ const HitDetail: FC<IHitDetailProps> = ({ segInfo }) => {
)
}
return segInfo?.content
return <div className='mb-4 text-md text-gray-800 h-full'>{segInfo?.content}</div>
}
return (
<div className='overflow-x-auto'>
<div className="bg-gray-25 p-6">
segInfo?.id === 'external'
? <div className='w-full overflow-x-auto px-2'>
<div className={s.segModalContent}>{renderContent()}</div>
</div>
: <div className='overflow-x-auto'>
<div className="flex items-center">
<SegmentIndexTag
positionId={segInfo?.position || ''}
@ -59,7 +62,6 @@ const HitDetail: FC<IHitDetailProps> = ({ segInfo }) => {
})}
</div>
</div>
</div>
)
}

View File

@ -13,7 +13,7 @@ import s from './style.module.css'
import HitDetail from './hit-detail'
import ModifyRetrievalModal from './modify-retrieval-modal'
import cn from '@/utils/classnames'
import type { HitTestingResponse, HitTesting as HitTestingType } from '@/models/datasets'
import type { ExternalKnowledgeBaseHitTestingResponse, ExternalKnowledgeBaseHitTesting as ExternalKnowledgeBaseHitTestingType, HitTestingResponse, HitTesting as HitTestingType } from '@/models/datasets'
import Loading from '@/app/components/base/loading'
import Modal from '@/app/components/base/modal'
import Drawer from '@/app/components/base/drawer'
@ -49,8 +49,10 @@ const HitTesting: FC<Props> = ({ datasetId }: Props) => {
const isMobile = media === MediaType.mobile
const [hitResult, setHitResult] = useState<HitTestingResponse | undefined>() // 初始化记录为空数组
const [externalHitResult, setExternalHitResult] = useState<ExternalKnowledgeBaseHitTestingResponse | undefined>()
const [submitLoading, setSubmitLoading] = useState(false)
const [currParagraph, setCurrParagraph] = useState<{ paraInfo?: HitTestingType; showModal: boolean }>({ showModal: false })
const [externalCurrParagraph, setExternalCurrParagraph] = useState<{ paraInfo?: ExternalKnowledgeBaseHitTestingType; showModal: boolean }>({ showModal: false })
const [text, setText] = useState('')
const [currPage, setCurrPage] = React.useState<number>(0)
@ -66,12 +68,52 @@ const HitTesting: FC<Props> = ({ datasetId }: Props) => {
setCurrParagraph({ paraInfo: detail, showModal: true })
}
const onClickExternalCard = (detail: ExternalKnowledgeBaseHitTestingType) => {
setExternalCurrParagraph({ paraInfo: detail, showModal: true })
}
const { dataset: currentDataset } = useContext(DatasetDetailContext)
const isExternal = currentDataset?.provider === 'external'
const [retrievalConfig, setRetrievalConfig] = useState(currentDataset?.retrieval_model_dict as RetrievalConfig)
const [isShowModifyRetrievalModal, setIsShowModifyRetrievalModal] = useState(false)
const [isShowRightPanel, { setTrue: showRightPanel, setFalse: hideRightPanel, set: setShowRightPanel }] = useBoolean(!isMobile)
const renderHitResults = (results: any[], onClickCard: (record: any) => void) => (
<>
<div className='text-gray-600 font-semibold mb-4'>{t('datasetHitTesting.hit.title')}</div>
<div className='overflow-auto flex-1'>
<div className={s.cardWrapper}>
{results.map((record, idx) => (
<SegmentCard
key={idx}
loading={false}
refSource= {{
title: record.title,
uri: record.metadata ? record.metadata['x-amz-bedrock-kb-source-uri'] : '',
}}
isExternal={isExternal}
detail={record.segment}
contentExternal={record.content}
score={record.score}
scene='hitTesting'
className='h-[216px] mb-4'
onClick={() => onClickCard(record)}
/>
))}
</div>
</div>
</>
)
const renderEmptyState = () => (
<div className='h-full flex flex-col justify-center items-center'>
<div className={cn(docStyle.commonIcon, docStyle.targetIcon, '!bg-gray-200 !h-14 !w-14')} />
<div className='text-gray-300 text-[13px] mt-3'>
{t('datasetHitTesting.hit.emptyTip')}
</div>
</div>
)
useEffect(() => {
setShowRightPanel(!isMobile)
}, [isMobile, setShowRightPanel])
@ -86,12 +128,14 @@ const HitTesting: FC<Props> = ({ datasetId }: Props) => {
<Textarea
datasetId={datasetId}
setHitResult={setHitResult}
setExternalHitResult={setExternalHitResult}
onSubmit={showRightPanel}
onUpdateList={recordsMutate}
loading={submitLoading}
setLoading={setSubmitLoading}
setText={setText}
text={text}
isExternal={isExternal}
onClickRetrievalMethod={() => setIsShowModifyRetrievalModal(true)}
retrievalConfig={retrievalConfig}
isEconomy={currentDataset?.indexing_technique === 'economy'}
@ -159,47 +203,42 @@ const HitTesting: FC<Props> = ({ datasetId }: Props) => {
className='h-[216px]'
/>
</div>
: !hitResult?.records.length
? (
<div className='h-full flex flex-col justify-center items-center'>
<div className={cn(docStyle.commonIcon, docStyle.targetIcon, '!bg-gray-200 !h-14 !w-14')} />
<div className='text-gray-300 text-[13px] mt-3'>
{t('datasetHitTesting.hit.emptyTip')}
</div>
</div>
)
: (
<>
<div className='text-gray-600 font-semibold mb-4'>{t('datasetHitTesting.hit.title')}</div>
<div className='overflow-auto flex-1'>
<div className={s.cardWrapper}>
{hitResult?.records.map((record, idx) => {
return <SegmentCard
key={idx}
loading={false}
detail={record.segment as any}
score={record.score}
scene='hitTesting'
className='h-[216px] mb-4'
onClick={() => onClickCard(record as any)}
/>
})}
</div>
</div>
</>
)
: (
(() => {
if (!hitResult?.records.length && !externalHitResult?.records.length)
return renderEmptyState()
if (hitResult?.records.length)
return renderHitResults(hitResult.records, onClickCard)
return renderHitResults(externalHitResult?.records || [], onClickExternalCard)
})()
)
}
</div>
</FloatRightContainer>
<Modal
className='w-[520px] p-0'
className={isExternal ? 'py-10 px-8' : 'w-full'}
closable
onClose={() => setCurrParagraph({ showModal: false })}
isShow={currParagraph.showModal}
onClose={() => {
setCurrParagraph({ showModal: false })
setExternalCurrParagraph({ showModal: false })
}}
isShow={currParagraph.showModal || externalCurrParagraph.showModal}
>
{currParagraph.showModal && <HitDetail
segInfo={currParagraph.paraInfo?.segment}
/>}
{currParagraph.showModal && (
<HitDetail
segInfo={currParagraph.paraInfo?.segment}
/>
)}
{externalCurrParagraph.showModal && (
<HitDetail
segInfo={{
id: 'external',
content: externalCurrParagraph.paraInfo?.content,
}}
/>
)}
</Modal>
<Drawer isOpen={isShowModifyRetrievalModal} onClose={() => setIsShowModifyRetrievalModal(false)} footer={null} mask={isMobile} panelClassname='mt-16 mx-2 sm:mr-2 mb-3 !p-0 !max-w-[640px] rounded-xl'>
<ModifyRetrievalModal

View File

@ -0,0 +1,71 @@
import { useState } from 'react'
import {
RiCloseLine,
} from '@remixicon/react'
import { useTranslation } from 'react-i18next'
import RetrievalSettings from '../external-knowledge-base/create/RetrievalSettings'
import Button from '@/app/components/base/button'
import ActionButton from '@/app/components/base/action-button'
type ModifyExternalRetrievalModalProps = {
onClose: () => void
onSave: (data: { top_k: number; score_threshold: number; score_threshold_enabled: boolean }) => void
initialTopK: number
initialScoreThreshold: number
initialScoreThresholdEnabled: boolean
}
const ModifyExternalRetrievalModal: React.FC<ModifyExternalRetrievalModalProps> = ({
onClose,
onSave,
initialTopK,
initialScoreThreshold,
initialScoreThresholdEnabled,
}) => {
const { t } = useTranslation()
const [topK, setTopK] = useState(initialTopK)
const [scoreThreshold, setScoreThreshold] = useState(initialScoreThreshold)
const [scoreThresholdEnabled, setScoreThresholdEnabled] = useState(initialScoreThresholdEnabled)
const handleSettingsChange = (data: { top_k?: number; score_threshold?: number; score_threshold_enabled?: boolean }) => {
if (data.top_k !== undefined)
setTopK(data.top_k)
if (data.score_threshold !== undefined)
setScoreThreshold(data.score_threshold)
if (data.score_threshold_enabled !== undefined)
setScoreThresholdEnabled(data.score_threshold_enabled)
}
const handleSave = () => {
onSave({ top_k: topK, score_threshold: scoreThreshold, score_threshold_enabled: scoreThresholdEnabled })
onClose()
}
return (
<div className='absolute z-10 top-[36px] right-[14px] flex w-[320px] flex-col items-start rounded-2xl border-[0.5px]
border-components-panel-border bg-components-panel-bg shadows-shadow-2xl'
>
<div className='flex p-4 pb-2 items-center justify-between self-stretch'>
<div className='text-text-primary system-xl-semibold flex-grow'>{t('datasetHitTesting.settingTitle')}</div>
<ActionButton className='ml-auto' onClick={onClose}>
<RiCloseLine className='w-4 h-4 flex-shrink-0' />
</ActionButton>
</div>
<div className='flex p-4 pt-2 flex-col justify-center items-start gap-4 self-stretch'>
<RetrievalSettings
topK={topK}
scoreThreshold={scoreThreshold}
scoreThresholdEnabled={scoreThresholdEnabled}
onChange={handleSettingsChange}
isInHitTesting={true}
/>
</div>
<div className='flex p-4 pt-2 justify-end items-end gap-1 w-full'>
<Button className='flex-shrink-0 min-w-[72px]' onClick={onClose}>{t('common.operation.cancel')}</Button>
<Button variant='primary' className='flex-shrink-0 min-w-[72px]' onClick={handleSave}>{t('common.operation.save')}</Button>
</div>
</div>
)
}
export default ModifyExternalRetrievalModal

View File

@ -1,12 +1,17 @@
import React, { useState } from 'react'
import { useTranslation } from 'react-i18next'
import {
RiEqualizer2Line,
} from '@remixicon/react'
import Button from '../../base/button'
import Tag from '../../base/tag'
import { getIcon } from '../common/retrieval-method-info'
import s from './style.module.css'
import ModifyExternalRetrievalModal from './modify-external-retrieval-modal'
import Tooltip from '@/app/components/base/tooltip'
import cn from '@/utils/classnames'
import type { HitTestingResponse } from '@/models/datasets'
import { hitTesting } from '@/service/datasets'
import type { ExternalKnowledgeBaseHitTestingResponse, HitTestingResponse } from '@/models/datasets'
import { externalKnowledgeBaseHitTesting, hitTesting } from '@/service/datasets'
import { asyncRunSafe } from '@/utils'
import { RETRIEVE_METHOD, type RetrievalConfig } from '@/types/app'
@ -14,10 +19,12 @@ type TextAreaWithButtonIProps = {
datasetId: string
onUpdateList: () => void
setHitResult: (res: HitTestingResponse) => void
setExternalHitResult: (res: ExternalKnowledgeBaseHitTestingResponse) => void
loading: boolean
setLoading: (v: boolean) => void
text: string
setText: (v: string) => void
isExternal?: boolean
onClickRetrievalMethod: () => void
retrievalConfig: RetrievalConfig
isEconomy: boolean
@ -28,16 +35,29 @@ const TextAreaWithButton = ({
datasetId,
onUpdateList,
setHitResult,
setExternalHitResult,
setLoading,
loading,
text,
setText,
isExternal = false,
onClickRetrievalMethod,
retrievalConfig,
isEconomy,
onSubmit: _onSubmit,
}: TextAreaWithButtonIProps) => {
const { t } = useTranslation()
const [isSettingsOpen, setIsSettingsOpen] = useState(false)
const [externalRetrievalSettings, setExternalRetrievalSettings] = useState({
top_k: 2,
score_threshold: 0.5,
score_threshold_enabled: false,
})
const handleSaveExternalRetrievalSettings = (data: { top_k: number; score_threshold: number; score_threshold_enabled: boolean }) => {
setExternalRetrievalSettings(data)
setIsSettingsOpen(false)
}
function handleTextChange(event: any) {
setText(event.target.value)
@ -63,28 +83,70 @@ const TextAreaWithButton = ({
_onSubmit && _onSubmit()
}
const externalRetrievalTestingOnSubmit = async () => {
const [e, res] = await asyncRunSafe<ExternalKnowledgeBaseHitTestingResponse>(
externalKnowledgeBaseHitTesting({
datasetId,
query: text,
external_retrieval_model: {
top_k: externalRetrievalSettings.top_k,
score_threshold: externalRetrievalSettings.score_threshold,
score_threshold_enabled: externalRetrievalSettings.score_threshold_enabled,
},
}) as Promise<ExternalKnowledgeBaseHitTestingResponse>,
)
if (!e) {
setExternalHitResult(res)
onUpdateList?.()
}
setLoading(false)
}
const retrievalMethod = isEconomy ? RETRIEVE_METHOD.invertedIndex : retrievalConfig.search_method
const Icon = getIcon(retrievalMethod)
return (
<>
<div className={s.wrapper}>
<div className='pt-2 rounded-tl-xl rounded-tr-xl bg-[#EEF4FF]'>
<div className='relative pt-2 rounded-tl-xl rounded-tr-xl bg-[#EEF4FF]'>
<div className="px-4 pb-2 flex justify-between h-8 items-center">
<span className="text-gray-800 font-semibold text-sm">
{t('datasetHitTesting.input.title')}
</span>
<Tooltip
popupContent={t('dataset.retrieval.changeRetrievalMethod')}
>
<div
onClick={onClickRetrievalMethod}
className='flex px-2 h-7 items-center space-x-1 bg-white hover:bg-[#ECE9FE] rounded-md shadow-sm cursor-pointer text-[#6927DA]'
{isExternal
? <Button
variant='secondary'
size='small'
onClick={() => setIsSettingsOpen(!isSettingsOpen)}
>
<Icon className='w-3.5 h-3.5'></Icon>
<div className='text-xs font-medium'>{t(`dataset.retrieval.${retrievalMethod}.title`)}</div>
</div>
</Tooltip>
<RiEqualizer2Line className='text-components-button-secondary-text w-3.5 h-3.5' />
<div className='flex px-[3px] justify-center items-center gap-1'>
<span className='text-components-button-secondary-text system-xs-medium'>{t('datasetHitTesting.settingTitle')}</span>
</div>
</Button>
: <Tooltip
popupContent={t('dataset.retrieval.changeRetrievalMethod')}
>
<div
onClick={onClickRetrievalMethod}
className='flex px-2 h-7 items-center space-x-1 bg-white hover:bg-[#ECE9FE] rounded-md shadow-sm cursor-pointer text-[#6927DA]'
>
<Icon className='w-3.5 h-3.5'></Icon>
<div className='text-xs font-medium'>{t(`dataset.retrieval.${retrievalMethod}.title`)}</div>
</div>
</Tooltip>
}
</div>
{
isSettingsOpen && (
<ModifyExternalRetrievalModal
onClose={() => setIsSettingsOpen(false)}
onSave={handleSaveExternalRetrievalSettings}
initialTopK={externalRetrievalSettings.top_k}
initialScoreThreshold={externalRetrievalSettings.score_threshold}
initialScoreThresholdEnabled={externalRetrievalSettings.score_threshold_enabled}
/>
)
}
<div className='h-2 rounded-tl-xl rounded-tr-xl bg-white'></div>
</div>
<div className='px-4 pb-11'>
@ -122,7 +184,7 @@ const TextAreaWithButton = ({
<div>
<Button
onClick={onSubmit}
onClick={isExternal ? externalRetrievalTestingOnSubmit : onSubmit}
variant="primary"
loading={loading}
disabled={(!text?.length || text?.length > 200)}
@ -132,7 +194,6 @@ const TextAreaWithButton = ({
</div>
</div>
</div>
</div>
</>
)

View File

@ -26,6 +26,8 @@ const RenameDatasetModal = ({ show, dataset, onSuccess, onClose }: RenameDataset
const [loading, setLoading] = useState(false)
const [name, setName] = useState<string>(dataset.name)
const [description, setDescription] = useState<string>(dataset.description)
const [externalKnowledgeId, setExternalKnowledgeId] = useState<string>(dataset.external_knowledge_info.external_knowledge_id)
const [externalKnowledgeApiId, setExternalKnowledgeApiId] = useState<string>(dataset.external_knowledge_info.external_knowledge_api_id)
const onConfirm: MouseEventHandler = async () => {
if (!name.trim()) {
@ -34,12 +36,17 @@ const RenameDatasetModal = ({ show, dataset, onSuccess, onClose }: RenameDataset
}
try {
setLoading(true)
const body: Partial<DataSet> & { external_knowledge_id?: string; external_knowledge_api_id?: string } = {
name,
description,
}
if (externalKnowledgeId && externalKnowledgeApiId) {
body.external_knowledge_id = externalKnowledgeId
body.external_knowledge_api_id = externalKnowledgeApiId
}
await updateDatasetSetting({
datasetId: dataset.id,
body: {
name,
description,
},
body,
})
notify({ type: 'success', message: t('common.actionMsg.modifiedSuccessfully') })
if (onSuccess)

View File

@ -8,11 +8,14 @@ import { useSWRConfig } from 'swr'
import { unstable_serialize } from 'swr/infinite'
import PermissionSelector from '../permission-selector'
import IndexMethodRadio from '../index-method-radio'
import RetrievalSettings from '../../external-knowledge-base/create/RetrievalSettings'
import cn from '@/utils/classnames'
import RetrievalMethodConfig from '@/app/components/datasets/common/retrieval-method-config'
import EconomicalRetrievalMethodConfig from '@/app/components/datasets/common/economical-retrieval-method-config'
import { ToastContext } from '@/app/components/base/toast'
import Button from '@/app/components/base/button'
import Divider from '@/app/components/base/divider'
import { ApiConnectionMod } from '@/app/components/base/icons/src/vender/solid/development'
import { updateDatasetSetting } from '@/service/datasets'
import type { DataSetListResponse } from '@/models/datasets'
import DatasetDetailContext from '@/context/dataset-detail'
@ -55,6 +58,9 @@ const Form = () => {
const [name, setName] = useState(currentDataset?.name ?? '')
const [description, setDescription] = useState(currentDataset?.description ?? '')
const [permission, setPermission] = useState(currentDataset?.permission)
const [topK, setTopK] = useState(currentDataset?.external_retrieval_model.top_k ?? 2)
const [scoreThreshold, setScoreThreshold] = useState(currentDataset?.external_retrieval_model.score_threshold ?? 0.5)
const [scoreThresholdEnabled, setScoreThresholdEnabled] = useState(currentDataset?.external_retrieval_model.score_threshold_enabled ?? false)
const [selectedMemberIDs, setSelectedMemberIDs] = useState<string[]>(currentDataset?.partial_member_list || [])
const [memberList, setMemberList] = useState<Member[]>([])
const [indexMethod, setIndexMethod] = useState(currentDataset?.indexing_technique)
@ -85,6 +91,15 @@ const Form = () => {
setMemberList(accounts)
}
const handleSettingsChange = (data: { top_k?: number; score_threshold?: number; score_threshold_enabled?: boolean }) => {
if (data.top_k !== undefined)
setTopK(data.top_k)
if (data.score_threshold !== undefined)
setScoreThreshold(data.score_threshold)
if (data.score_threshold_enabled !== undefined)
setScoreThresholdEnabled(data.score_threshold_enabled)
}
useMount(() => {
getMembers()
})
@ -126,10 +141,17 @@ const Form = () => {
description,
permission,
indexing_technique: indexMethod,
external_retrieval_model: {
top_k: topK,
score_threshold: scoreThreshold,
score_threshold_enabled: scoreThresholdEnabled,
},
retrieval_model: {
...postRetrievalConfig,
score_threshold: postRetrievalConfig.score_threshold_enabled ? postRetrievalConfig.score_threshold : 0,
},
external_knowledge_id: currentDataset!.external_knowledge_info.external_knowledge_id,
external_knowledge_api_id: currentDataset!.external_knowledge_info.external_knowledge_api_id,
embedding_model: embeddingModel.model,
embedding_model_provider: embeddingModel.provider,
},
@ -161,7 +183,7 @@ const Form = () => {
<div className='w-full sm:w-[800px] p-4 sm:px-16 sm:py-6'>
<div className={rowClass}>
<div className={labelClass}>
<div>{t('datasetSettings.form.name')}</div>
<div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.name')}</div>
</div>
<div className='w-full max-w-[480px]'>
<input
@ -174,7 +196,7 @@ const Form = () => {
</div>
<div className={rowClass}>
<div className={labelClass}>
<div>{t('datasetSettings.form.desc')}</div>
<div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.desc')}</div>
</div>
<div className='w-full max-w-[480px]'>
<textarea
@ -192,7 +214,7 @@ const Form = () => {
</div>
<div className={rowClass}>
<div className={labelClass}>
<div>{t('datasetSettings.form.permissions')}</div>
<div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.permissions')}</div>
</div>
<div className='w-full sm:w-[480px]'>
<PermissionSelector
@ -210,7 +232,7 @@ const Form = () => {
<div className='w-full h-0 border-b-[0.5px] border-b-gray-200 my-2' />
<div className={rowClass}>
<div className={labelClass}>
<div>{t('datasetSettings.form.indexMethod')}</div>
<div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.indexMethod')}</div>
</div>
<div className='w-full sm:w-[480px]'>
<IndexMethodRadio
@ -225,7 +247,7 @@ const Form = () => {
{indexMethod === 'high_quality' && (
<div className={rowClass}>
<div className={labelClass}>
<div>{t('datasetSettings.form.embeddingModel')}</div>
<div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.embeddingModel')}</div>
</div>
<div className='w-[480px]'>
<ModelSelector
@ -240,32 +262,76 @@ const Form = () => {
</div>
)}
{/* Retrieval Method Config */}
<div className={rowClass}>
<div className={labelClass}>
<div>
<div>{t('datasetSettings.form.retrievalSetting.title')}</div>
<div className='leading-[18px] text-xs font-normal text-gray-500'>
<a target='_blank' rel='noopener noreferrer' href='https://docs.dify.ai/guides/knowledge-base/create-knowledge-and-upload-documents#id-4-retrieval-settings' className='text-[#155eef]'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a>
{t('datasetSettings.form.retrievalSetting.description')}
{currentDataset?.provider === 'external'
? <>
<div className={rowClass}><Divider/></div>
<div className={rowClass}>
<div className={labelClass}>
<div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.retrievalSetting.title')}</div>
</div>
<RetrievalSettings
topK={topK}
scoreThreshold={scoreThreshold}
scoreThresholdEnabled={scoreThresholdEnabled}
onChange={handleSettingsChange}
isInRetrievalSetting={true}
/>
</div>
<div className={rowClass}><Divider/></div>
<div className={rowClass}>
<div className={labelClass}>
<div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.externalKnowledgeAPI')}</div>
</div>
<div className='w-full max-w-[480px]'>
<div className='flex h-full px-3 py-2 items-center gap-1 rounded-lg bg-components-input-bg-normal'>
<ApiConnectionMod className='w-4 h-4 text-text-secondary' />
<div className='overflow-hidden text-text-secondary text-ellipsis system-sm-medium'>
{currentDataset?.external_knowledge_info.external_knowledge_api_name}
</div>
<div className='text-text-tertiary system-xs-regular'>·</div>
<div className='text-text-tertiary system-xs-regular'>{currentDataset?.external_knowledge_info.external_knowledge_api_endpoint}</div>
</div>
</div>
</div>
<div className={rowClass}>
<div className={labelClass}>
<div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.externalKnowledgeID')}</div>
</div>
<div className='w-full max-w-[480px]'>
<div className='flex h-full px-3 py-2 items-center gap-1 rounded-lg bg-components-input-bg-normal'>
<div className='text-text-tertiary system-xs-regular'>{currentDataset?.external_knowledge_info.external_knowledge_id}</div>
</div>
</div>
</div>
<div className={rowClass}><Divider/></div>
</>
: <div className={rowClass}>
<div className={labelClass}>
<div>
<div className='text-text-secondary system-sm-semibold'>{t('datasetSettings.form.retrievalSetting.title')}</div>
<div className='leading-[18px] text-xs font-normal text-gray-500'>
<a target='_blank' rel='noopener noreferrer' href='https://docs.dify.ai/guides/knowledge-base/create-knowledge-and-upload-documents#id-4-retrieval-settings' className='text-[#155eef]'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a>
{t('datasetSettings.form.retrievalSetting.description')}
</div>
</div>
</div>
<div className='w-[480px]'>
{indexMethod === 'high_quality'
? (
<RetrievalMethodConfig
value={retrievalConfig}
onChange={setRetrievalConfig}
/>
)
: (
<EconomicalRetrievalMethodConfig
value={retrievalConfig}
onChange={setRetrievalConfig}
/>
)}
</div>
</div>
<div className='w-[480px]'>
{indexMethod === 'high_quality'
? (
<RetrievalMethodConfig
value={retrievalConfig}
onChange={setRetrievalConfig}
/>
)
: (
<EconomicalRetrievalMethodConfig
value={retrievalConfig}
onChange={setRetrievalConfig}
/>
)}
</div>
</div>
}
<div className={rowClass}>
<div className={labelClass} />
<div className='w-[480px]'>

View File

@ -51,7 +51,7 @@ const DatasetNav = () => {
navs={datasetItems.map(dataset => ({
id: dataset.id,
name: dataset.name,
link: `/datasets/${dataset.id}/documents`,
link: dataset.provider === 'external' ? `/datasets/${dataset.id}/hitTesting` : `/datasets/${dataset.id}/documents`,
icon: dataset.icon,
icon_background: dataset.icon_background,
})) as NavItem[]}

View File

@ -1,13 +1,15 @@
'use client'
import type { FC } from 'react'
import React, { useCallback } from 'react'
import React, { useCallback, useState } from 'react'
import { useBoolean } from 'ahooks'
import {
RiDeleteBinLine,
RiEditLine,
} from '@remixicon/react'
import { useTranslation } from 'react-i18next'
import type { DataSet } from '@/models/datasets'
import { DataSourceType } from '@/models/datasets'
import ActionButton, { ActionButtonState } from '@/app/components/base/action-button'
import FileIcon from '@/app/components/base/file-icon'
import { Folder } from '@/app/components/base/icons/src/vender/solid/files'
import SettingsModal from '@/app/components/app/configuration/dataset-config/settings-modal'
@ -30,8 +32,10 @@ const DatasetItem: FC<Props> = ({
readonly,
}) => {
const media = useBreakpoints()
const { t } = useTranslation()
const isMobile = media === MediaType.mobile
const { formatIndexingTechniqueAndMethod } = useKnowledge()
const [isDeleteHovered, setIsDeleteHovered] = useState(false)
const [isShowSettingsModal, {
setTrue: showSettingsModal,
@ -43,8 +47,18 @@ const DatasetItem: FC<Props> = ({
hideSettingsModal()
}, [hideSettingsModal, onChange])
const handleRemove = useCallback((e: React.MouseEvent) => {
e.stopPropagation()
onRemove()
}, [onRemove])
return (
<div className='flex items-center h-10 justify-between rounded-xl px-2 bg-white border border-gray-200 cursor-pointer group/dataset-item'>
<div className={`flex items-center h-10 justify-between rounded-xl px-2 border-[0.5px]
border-components-panel-border-subtle cursor-pointer group/dataset-item
${isDeleteHovered
? 'bg-state-destructive-hover border-state-destructive-border'
: 'bg-components-panel-on-panel-item-bg hover:bg-components-panel-on-panel-item-bg-hover'
}`}>
<div className='w-0 grow flex items-center space-x-1.5'>
{
payload.data_source_type === DataSourceType.NOTION
@ -61,24 +75,36 @@ const DatasetItem: FC<Props> = ({
</div>
{!readonly && (
<div className='hidden group-hover/dataset-item:flex shrink-0 ml-2 items-center space-x-1'>
<div
className='flex items-center justify-center w-6 h-6 hover:bg-black/5 rounded-md cursor-pointer'
onClick={showSettingsModal}
<ActionButton
onClick={(e) => {
e.stopPropagation()
showSettingsModal()
}}
>
<RiEditLine className='w-4 h-4 text-gray-500' />
</div>
<div
className='flex items-center justify-center w-6 h-6 hover:bg-black/5 rounded-md cursor-pointer'
onClick={onRemove}
<RiEditLine className='w-4 h-4 flex-shrink-0 text-text-tertiary' />
</ActionButton>
<ActionButton
onClick={handleRemove}
state={ActionButtonState.Destructive}
onMouseEnter={() => setIsDeleteHovered(true)}
onMouseLeave={() => setIsDeleteHovered(false)}
>
<RiDeleteBinLine className='w-4 h-4 text-gray-500' />
</div>
<RiDeleteBinLine className={`w-4 h-4 flex-shrink-0 ${isDeleteHovered ? 'text-text-destructive' : 'text-text-tertiary'}`} />
</ActionButton>
</div>
)}
<Badge
className='group-hover/dataset-item:hidden shrink-0'
text={formatIndexingTechniqueAndMethod(payload.indexing_technique, payload.retrieval_model_dict?.search_method)}
/>
{
payload.indexing_technique && <Badge
className='group-hover/dataset-item:hidden shrink-0'
text={formatIndexingTechniqueAndMethod(payload.indexing_technique, payload.retrieval_model_dict?.search_method)}
/>
}
{
payload.provider === 'external' && <Badge
className='group-hover/dataset-item:hidden shrink-0'
text={t('dataset.externalTag')}
/>
}
{isShowSettingsModal && (
<Drawer isOpen={isShowSettingsModal} onClose={hideSettingsModal} footer={null} mask={isMobile} panelClassname='mt-16 mx-2 sm:mr-2 mb-3 !p-0 !max-w-[640px] rounded-xl'>

View File

@ -207,9 +207,11 @@ const useConfig = (id: string, payload: KnowledgeRetrievalNodeType) => {
const handleOnDatasetsChange = useCallback((newDatasets: DataSet[]) => {
const {
allEconomic,
mixtureHighQualityAndEconomic,
mixtureInternalAndExternal,
inconsistentEmbeddingModel,
allInternal,
allExternal,
} = getSelectedDatasetsMode(newDatasets)
const newInputs = produce(inputs, (draft) => {
draft.dataset_ids = newDatasets.map(d => d.id)
@ -222,7 +224,11 @@ const useConfig = (id: string, payload: KnowledgeRetrievalNodeType) => {
setInputs(newInputs)
setSelectedDatasets(newDatasets)
if (allEconomic || mixtureHighQualityAndEconomic || inconsistentEmbeddingModel)
if (
(allInternal && (mixtureHighQualityAndEconomic || inconsistentEmbeddingModel))
|| mixtureInternalAndExternal
|| (allExternal && newDatasets.length > 1)
)
setRerankModelOpen(true)
}, [inputs, setInputs, payload.retrieval_mode])

View File

@ -21,6 +21,9 @@ export const getSelectedDatasetsMode = (datasets: DataSet[]) => {
let allHighQualityFullTextSearch = true
let allEconomic = true
let mixtureHighQualityAndEconomic = true
let allExternal = true
let allInternal = true
let mixtureInternalAndExternal = true
let inconsistentEmbeddingModel = false
if (!datasets.length) {
allHighQuality = false
@ -29,6 +32,9 @@ export const getSelectedDatasetsMode = (datasets: DataSet[]) => {
allEconomic = false
mixtureHighQualityAndEconomic = false
inconsistentEmbeddingModel = false
allExternal = false
allInternal = false
mixtureInternalAndExternal = false
}
datasets.forEach((dataset) => {
if (dataset.indexing_technique === 'economy') {
@ -45,8 +51,21 @@ export const getSelectedDatasetsMode = (datasets: DataSet[]) => {
if (dataset.retrieval_model_dict.search_method !== RETRIEVE_METHOD.fullText)
allHighQualityFullTextSearch = false
}
if (dataset.provider !== 'external') {
allExternal = false
}
else {
allInternal = false
allHighQuality = false
allHighQualityVectorSearch = false
allHighQualityFullTextSearch = false
mixtureHighQualityAndEconomic = false
}
})
if (allExternal || allInternal)
mixtureInternalAndExternal = false
if (allHighQuality || allEconomic)
mixtureHighQualityAndEconomic = false
@ -59,6 +78,9 @@ export const getSelectedDatasetsMode = (datasets: DataSet[]) => {
allHighQualityFullTextSearch,
allEconomic,
mixtureHighQualityAndEconomic,
allInternal,
allExternal,
mixtureInternalAndExternal,
inconsistentEmbeddingModel,
} as SelectedDatasetsMode
}
@ -70,6 +92,9 @@ export const getMultipleRetrievalConfig = (multipleRetrievalConfig: MultipleRetr
allHighQualityFullTextSearch,
allEconomic,
mixtureHighQualityAndEconomic,
allInternal,
allExternal,
mixtureInternalAndExternal,
inconsistentEmbeddingModel,
} = getSelectedDatasetsMode(selectedDatasets)
@ -91,13 +116,13 @@ export const getMultipleRetrievalConfig = (multipleRetrievalConfig: MultipleRetr
reranking_enable: allEconomic ? reranking_enable : true,
}
if (allEconomic || mixtureHighQualityAndEconomic || inconsistentEmbeddingModel)
if (allEconomic || mixtureHighQualityAndEconomic || inconsistentEmbeddingModel || allExternal || mixtureInternalAndExternal)
result.reranking_mode = RerankingModeEnum.RerankingModel
if (allHighQuality && !inconsistentEmbeddingModel && reranking_mode === undefined)
if (allHighQuality && !inconsistentEmbeddingModel && reranking_mode === undefined && allInternal)
result.reranking_mode = RerankingModeEnum.WeightedScore
if (allHighQuality && !inconsistentEmbeddingModel && (reranking_mode === RerankingModeEnum.WeightedScore || reranking_mode === undefined) && !weights) {
if (allHighQuality && !inconsistentEmbeddingModel && (reranking_mode === RerankingModeEnum.WeightedScore || reranking_mode === undefined) && allInternal && !weights) {
result.weights = {
vector_setting: {
vector_weight: allHighQualityVectorSearch

View File

@ -0,0 +1,28 @@
'use client'
import React, { createContext, useContext, useState } from 'react'
type ExternalApiPanelContextType = {
showExternalApiPanel: boolean
setShowExternalApiPanel: (show: boolean) => void
}
const ExternalApiPanelContext = createContext<ExternalApiPanelContextType | undefined>(undefined)
export const ExternalApiPanelProvider: React.FC<{ children: React.ReactNode }> = ({ children }) => {
const [showExternalApiPanel, setShowExternalApiPanel] = useState(false)
return (
<ExternalApiPanelContext.Provider value={{ showExternalApiPanel, setShowExternalApiPanel }}>
{children}
</ExternalApiPanelContext.Provider>
)
}
export const useExternalApiPanel = () => {
const context = useContext(ExternalApiPanelContext)
if (context === undefined)
throw new Error('useExternalApiPanel must be used within an ExternalApiPanelProvider')
return context
}

View File

@ -0,0 +1,46 @@
'use client'
import { createContext, useContext, useMemo } from 'react'
import type { FC, ReactNode } from 'react'
import useSWR from 'swr'
import type { ExternalAPIItem, ExternalAPIListResponse } from '@/models/datasets'
import { fetchExternalAPIList } from '@/service/datasets'
type ExternalKnowledgeApiContextType = {
externalKnowledgeApiList: ExternalAPIItem[]
mutateExternalKnowledgeApis: () => Promise<ExternalAPIListResponse | undefined>
isLoading: boolean
}
const ExternalKnowledgeApiContext = createContext<ExternalKnowledgeApiContextType | undefined>(undefined)
export type ExternalKnowledgeApiProviderProps = {
children: ReactNode
}
export const ExternalKnowledgeApiProvider: FC<ExternalKnowledgeApiProviderProps> = ({ children }) => {
const { data, mutate: mutateExternalKnowledgeApis, isLoading } = useSWR<ExternalAPIListResponse>(
{ url: '/datasets/external-knowledge-api' },
fetchExternalAPIList,
)
const contextValue = useMemo<ExternalKnowledgeApiContextType>(() => ({
externalKnowledgeApiList: data?.data || [],
mutateExternalKnowledgeApis,
isLoading,
}), [data, mutateExternalKnowledgeApis, isLoading])
return (
<ExternalKnowledgeApiContext.Provider value={contextValue}>
{children}
</ExternalKnowledgeApiContext.Provider>
)
}
export const useExternalKnowledgeApi = () => {
const context = useContext(ExternalKnowledgeApiContext)
if (context === undefined)
throw new Error('useExternalKnowledgeApi must be used within a ExternalKnowledgeApiProvider')
return context
}

View File

@ -10,6 +10,7 @@ import ModerationSettingModal from '@/app/components/app/configuration/toolbox/m
import ExternalDataToolModal from '@/app/components/app/configuration/tools/external-data-tool-modal'
import AnnotationFullModal from '@/app/components/billing/annotation-full/modal'
import ModelModal from '@/app/components/header/account-setting/model-provider-page/model-modal'
import ExternalAPIModal from '@/app/components/datasets/external-api/external-api-modal'
import type {
ConfigurationMethodEnum,
CustomConfigurationModelFixedFields,
@ -23,6 +24,7 @@ import type {
ApiBasedExtension,
ExternalDataTool,
} from '@/models/common'
import type { CreateExternalAPIReq } from '@/app/components/datasets/external-api/declarations'
import ModelLoadBalancingEntryModal from '@/app/components/header/account-setting/model-provider-page/model-modal/model-load-balancing-entry-modal'
import type { ModelLoadBalancingModalProps } from '@/app/components/header/account-setting/model-provider-page/provider-added-card/model-load-balancing-modal'
import ModelLoadBalancingModal from '@/app/components/header/account-setting/model-provider-page/provider-added-card/model-load-balancing-modal'
@ -32,7 +34,10 @@ export type ModalState<T> = {
onCancelCallback?: () => void
onSaveCallback?: (newPayload: T) => void
onRemoveCallback?: (newPayload: T) => void
onEditCallback?: (newPayload: T) => void
onValidateBeforeSaveCallback?: (newPayload: T) => boolean
isEditMode?: boolean
datasetBindings?: { id: string; name: string }[]
}
export type ModelModalType = {
@ -52,6 +57,7 @@ export type ModalContextState = {
setShowPricingModal: () => void
setShowAnnotationFullModal: () => void
setShowModelModal: Dispatch<SetStateAction<ModalState<ModelModalType> | null>>
setShowExternalKnowledgeAPIModal: Dispatch<SetStateAction<ModalState<CreateExternalAPIReq> | null>>
setShowModelLoadBalancingModal: Dispatch<SetStateAction<ModelLoadBalancingModalProps | null>>
setShowModelLoadBalancingEntryModal: Dispatch<SetStateAction<ModalState<LoadBalancingEntryModalType> | null>>
}
@ -63,6 +69,7 @@ const ModalContext = createContext<ModalContextState>({
setShowPricingModal: () => { },
setShowAnnotationFullModal: () => { },
setShowModelModal: () => { },
setShowExternalKnowledgeAPIModal: () => { },
setShowModelLoadBalancingModal: () => { },
setShowModelLoadBalancingEntryModal: () => { },
})
@ -86,6 +93,7 @@ export const ModalContextProvider = ({
const [showModerationSettingModal, setShowModerationSettingModal] = useState<ModalState<ModerationConfig> | null>(null)
const [showExternalDataToolModal, setShowExternalDataToolModal] = useState<ModalState<ExternalDataTool> | null>(null)
const [showModelModal, setShowModelModal] = useState<ModalState<ModelModalType> | null>(null)
const [showExternalKnowledgeAPIModal, setShowExternalKnowledgeAPIModal] = useState<ModalState<CreateExternalAPIReq> | null>(null)
const [showModelLoadBalancingModal, setShowModelLoadBalancingModal] = useState<ModelLoadBalancingModalProps | null>(null)
const [showModelLoadBalancingEntryModal, setShowModelLoadBalancingEntryModal] = useState<ModalState<LoadBalancingEntryModalType> | null>(null)
const searchParams = useSearchParams()
@ -122,6 +130,24 @@ export const ModalContextProvider = ({
setShowModelModal(null)
}, [showModelModal])
const handleCancelExternalApiModal = useCallback(() => {
setShowExternalKnowledgeAPIModal(null)
if (showExternalKnowledgeAPIModal?.onCancelCallback)
showExternalKnowledgeAPIModal.onCancelCallback()
}, [showExternalKnowledgeAPIModal])
const handleSaveExternalApiModal = useCallback(async (updatedFormValue: CreateExternalAPIReq) => {
if (showExternalKnowledgeAPIModal?.onSaveCallback)
showExternalKnowledgeAPIModal.onSaveCallback(updatedFormValue)
setShowExternalKnowledgeAPIModal(null)
}, [showExternalKnowledgeAPIModal])
const handleEditExternalApiModal = useCallback(async (updatedFormValue: CreateExternalAPIReq) => {
if (showExternalKnowledgeAPIModal?.onEditCallback)
showExternalKnowledgeAPIModal.onEditCallback(updatedFormValue)
setShowExternalKnowledgeAPIModal(null)
}, [showExternalKnowledgeAPIModal])
const handleCancelModelLoadBalancingEntryModal = useCallback(() => {
showModelLoadBalancingEntryModal?.onCancelCallback?.()
setShowModelLoadBalancingEntryModal(null)
@ -173,6 +199,7 @@ export const ModalContextProvider = ({
setShowPricingModal: () => setShowPricingModal(true),
setShowAnnotationFullModal: () => setShowAnnotationFullModal(true),
setShowModelModal,
setShowExternalKnowledgeAPIModal,
setShowModelLoadBalancingModal,
setShowModelLoadBalancingEntryModal,
}}>
@ -245,6 +272,18 @@ export const ModalContextProvider = ({
/>
)
}
{
!!showExternalKnowledgeAPIModal && (
<ExternalAPIModal
data={showExternalKnowledgeAPIModal.payload}
datasetBindings={showExternalKnowledgeAPIModal.datasetBindings ?? []}
onSave={handleSaveExternalApiModal}
onCancel={handleCancelExternalApiModal}
onEdit={handleEditExternalApiModal}
isEditMode={showExternalKnowledgeAPIModal.isEditMode ?? false}
/>
)
}
{
Boolean(showModelLoadBalancingModal) && (
<ModelLoadBalancingModal {...showModelLoadBalancingModal!} />

View File

@ -1,6 +1,7 @@
const translation = {
title: 'Retrieval Testing',
desc: 'Test the hitting effect of the Knowledge based on the given query text.',
settingTitle: 'Retrieval Setting',
desc: 'Test the hitting effect of the Knowledge based on the given query text',
dateTimeFormat: 'MM/DD/YYYY hh:mm A',
recents: 'Recents',
table: {
@ -23,6 +24,7 @@ const translation = {
},
noRecentTip: 'No recent query results here',
viewChart: 'View VECTOR CHART',
viewDetail: 'View Detail',
}
export default translation

View File

@ -1,13 +1,13 @@
const translation = {
title: 'Knowledge settings',
desc: 'Here you can modify the properties and working methods of the Knowledge.',
desc: 'Here you can modify the properties and retrieval settings of this Knowledge.',
form: {
name: 'Knowledge Name',
namePlaceholder: 'Please enter the Knowledge name',
nameError: 'Name cannot be empty',
desc: 'Knowledge description',
desc: 'Knowledge Description',
descInfo: 'Please write a clear textual description to outline the content of the Knowledge. This description will be used as a basis for matching when selecting from multiple Knowledge for inference.',
descPlaceholder: 'Describe what is in this Knowledge. A detailed description allows AI to access the content of the Knowledge in a timely manner. If empty, Dify will use the default hit strategy.',
descPlaceholder: 'Describe what\'s in this Knowledge (optional)',
descWrite: 'Learn how to write a good Knowledge description.',
permissions: 'Permissions',
permissionsOnlyMe: 'Only me',
@ -23,11 +23,14 @@ const translation = {
embeddingModelTip: 'Change the embedded model, please go to ',
embeddingModelTipLink: 'Settings',
retrievalSetting: {
title: 'Retrieval setting',
title: 'Retrieval Setting',
learnMore: 'Learn more',
description: ' about retrieval method.',
longDescription: ' about retrieval method, you can change this at any time in the Knowledge settings.',
},
externalKnowledgeAPI: 'External Knowledge API',
externalKnowledgeID: 'External Knowledge ID',
retrievalSettings: 'Retrieval Settings',
save: 'Save',
},
}

View File

@ -1,9 +1,65 @@
const translation = {
knowledge: 'Knowledge',
externalTag: 'External',
externalAPI: 'External API',
externalAPIPanelTitle: 'External Knowledge API',
externalKnowledgeId: 'External Knowledge ID',
externalKnowledgeName: 'External Knowledge Name',
externalKnowledgeDescription: 'Knowledge Description',
externalKnowledgeIdPlaceholder: 'Please enter the Knowledge ID',
externalKnowledgeNamePlaceholder: 'Please enter the name of the knowledge base',
externalKnowledgeDescriptionPlaceholder: 'Describe what\'s in this Knowledge Base (optional)',
learnHowToWriteGoodKnowledgeDescription: 'Learn how to write a good knowledge description',
externalAPIPanelDescription: 'The external knowledge API is used to connect to a knowledge base outside of Dify and retrieve knowledge from that knowledge base.',
externalAPIPanelDocumentation: 'Learn how to create an External Knowledge API',
documentCount: ' docs',
wordCount: ' k words',
appCount: ' linked apps',
createDataset: 'Create Knowledge',
createNewExternalAPI: 'Create a new External Knowledge API',
noExternalKnowledge: 'There is no External Knowledge API yet, click here to create',
createExternalAPI: 'Add an External Knowledge API',
editExternalAPIFormTitle: 'Edit the External Knowledge API',
editExternalAPITooltipTitle: 'LINKED KNOWLEDGE',
editExternalAPIConfirmWarningContent: {
front: 'This External Knowledge API is linked to',
end: 'external knowledge, and this modification will be applied to all of them. Are you sure you want to save this change?',
},
editExternalAPIFormWarning: {
front: 'This External API is linked to',
end: 'external knowledge',
},
deleteExternalAPIConfirmWarningContent: {
title: {
front: 'Delete',
end: '?',
},
content: {
front: 'This External Knowledge API is linked to',
end: 'external knowledge. Deleting this API will invalidate all of them. Are you sure you want to delete this API?',
},
noConnectionContent: 'Are you sure to delete this API?',
},
selectExternalKnowledgeAPI: {
placeholder: 'Choose an External Knowledge API',
},
connectDataset: 'Connect to an External Knowledge Base',
connectDatasetIntro: {
title: 'How to Connect to an External Knowledge Base',
content: {
front: 'To connect to an external knowledge base, you need to create an external API first. Please read carefully and refer to',
link: 'Learn how to create an external API',
end: '. Then find the corresponding knowledge ID and fill it in the form on the left. If all the information is correct, it will automatically jump to the retrieval test in the knowledge base after clicking the connect button.',
},
learnMore: 'Learn More',
},
connectHelper: {
helper1: 'Connect to external knowledge bases via API and knowledge base ID. Currently, ',
helper2: 'only the retrieval functionality is supported',
helper3: '. We strongly recommend that you ',
helper4: 'read the help documentation',
helper5: ' carefully before using this feature.',
},
createDatasetIntro: 'Import your own text data or write data in real-time via Webhook for LLM context enhancement.',
deleteDatasetConfirmTitle: 'Delete this Knowledge?',
deleteDatasetConfirmContent:
@ -22,6 +78,22 @@ const translation = {
unavailableTip: 'Embedding model is not available, the default embedding model needs to be configured',
datasets: 'KNOWLEDGE',
datasetsApi: 'API ACCESS',
externalKnowledgeForm: {
connect: 'Connect',
cancel: 'Cancel',
},
externalAPIForm: {
name: 'Name',
endpoint: 'API Endpoint',
apiKey: 'API Key',
save: 'Save',
cancel: 'Cancel',
edit: 'Edit',
encrypted: {
front: 'Your API Token will be encrypted and stored using',
end: 'technology.',
},
},
retrieval: {
semantic_search: {
title: 'Vector Search',
@ -58,6 +130,8 @@ const translation = {
defaultRetrievalTip: 'Multi-path retrieval is used by default. Knowledge is retrieved from multiple knowledge bases and then re-ranked.',
mixtureHighQualityAndEconomicTip: 'The Rerank model is required for mixture of high quality and economical knowledge bases.',
inconsistentEmbeddingModelTip: 'The Rerank model is required if the Embedding models of the selected knowledge bases are inconsistent.',
mixtureInternalAndExternalTip: 'The Rerank model is required for mixture of internal and external knowledge.',
allExternalTip: 'When using external knowledge only, the user can choose whether to enable the Rerank model. If not enabled, retrieved chunks will be sorted based on scores. When the retrieval strategies of different knowledge bases are inconsistent, it will be inaccurate.',
retrievalSettings: 'Retrieval Setting',
rerankSettings: 'Rerank Setting',
weightedScore: {

View File

@ -1,6 +1,7 @@
const translation = {
title: '召回测试',
desc: '基于给定的查询文本测试知识库的召回效果。',
settingTitle: '召回设置',
desc: '基于给定的查询文本测试知识库的召回效果',
dateTimeFormat: 'YYYY-MM-DD HH:mm',
recents: '最近查询',
table: {
@ -23,6 +24,7 @@ const translation = {
},
noRecentTip: '最近无查询结果',
viewChart: '查看向量图表',
viewDetail: '查看详情',
}
export default translation

View File

@ -1,13 +1,13 @@
const translation = {
title: '知识库设置',
desc: '在这里您可以修改知识库的工作方式以及其它设置。',
desc: '在这里,您可以修改此知识库的属性和检索设置',
form: {
name: '知识库名称',
namePlaceholder: '请输入知识库名称',
nameError: '名称不能为空',
desc: '知识库描述',
descInfo: '请写出清楚的文字描述来概述知识库的内容。当从多个知识库中进行选择匹配时,该描述将用作匹配的基础。',
descPlaceholder: '描述这个知识库中的内容。详细的描述可以让 AI 及时访问知识库的内容。如果为空Dify 将使用默认的命中策略。',
descPlaceholder: '请描述这个知识库包含的内容(可选)',
descWrite: '了解如何编写更好的知识库描述。',
permissions: '可见权限',
permissionsOnlyMe: '只有我',
@ -28,6 +28,8 @@ const translation = {
description: '关于检索方法。',
longDescription: '关于检索方法,您可以随时在知识库设置中更改此设置。',
},
externalKnowledgeAPI: '外部知识 API',
externalKnowledgeID: '外部知识库 ID',
save: '保存',
},
}

View File

@ -1,9 +1,62 @@
const translation = {
knowledge: '知识库',
externalTag: '外部',
externalAPI: '外部 API',
externalAPIPanelTitle: '外部知识库 API',
externalKnowledgeId: '外部知识库 ID',
externalKnowledgeName: '外部知识库名称',
externalKnowledgeDescription: '知识库描述',
externalKnowledgeIdPlaceholder: '请输入外部知识库 ID',
externalKnowledgeNamePlaceholder: '请输入外部知识库名称',
externalKnowledgeDescriptionPlaceholder: '描述知识库内容(可选)',
learnHowToWriteGoodKnowledgeDescription: '了解如何编写良好的知识库描述',
externalAPIPanelDescription: '外部知识库 API 用于连接到 Dify 之外的知识库并从中检索知识。',
externalAPIPanelDocumentation: '了解如何创建外部知识库 API',
documentCount: ' 文档',
wordCount: ' 千字符',
appCount: ' 关联应用',
createDataset: '创建知识库',
noExternalKnowledge: '还没有外部知识库 API点击此处创建',
createExternalAPI: '添加外部知识库 API',
createNewExternalAPI: '创建新的外部知识库API',
editExternalAPIFormTitle: '编辑外部知识库 API',
editExternalAPITooltipTitle: '个关联知识库',
editExternalAPIConfirmWarningContent: {
front: '此外部知识库 API 已链接到',
end: '个外部知识库,此修改将应用于所有这些知识库。您确定要保存此更改吗?',
},
editExternalAPIFormWarning: {
front: '此外部 API 已链接到',
end: '外部知识库',
},
deleteExternalAPIConfirmWarningContent: {
title: {
front: '删除',
end: '',
},
content: {
front: '此外部知识库 API 已链接到',
end: '个外部知识库。删除此 API 将使所有这些知识库失效。您确定要删除此 API 吗?',
},
noConnectionContent: '您确定要删除此 API 吗?',
},
connectDatasetIntro: {
title: '如何连接到外部知识库',
content: {
front: '要连接到外部知识库,您需要先创建一个外部 API。请仔细阅读并参考',
link: '了解如何创建外部 API',
end: '。然后找到相应的知识库 ID 并填写在左侧表单中。如果所有信息正确,点击连接按钮后将自动跳转到知识库中的检索测试。',
},
learnMore: '了解更多',
},
connectHelper: {
helper1: '通过 API 和知识库 ID 连接到外部知识库。目前,',
helper2: '仅支持检索功能',
helper3: '。我们强烈建议您在使用此功能之前',
helper4: '仔细阅读帮助文档',
helper5: '。',
},
connectDataset: '连接外部知识库',
createDatasetIntro: '导入您自己的文本数据或通过 Webhook 实时写入数据以增强 LLM 的上下文。',
deleteDatasetConfirmTitle: '要删除知识库吗?',
deleteDatasetConfirmContent:
@ -11,6 +64,9 @@ const translation = {
datasetUsedByApp: '某些应用正在使用该知识库。应用将无法再使用该知识库,所有的提示配置和日志将被永久删除。',
datasetDeleted: '知识库已删除',
datasetDeleteFailed: '删除知识库失败',
selectExternalKnowledgeAPI: {
placeholder: '选择一个外部知识 API',
},
didYouKnow: '你知道吗?',
intro1: '知识库可以被集成到 Dify 应用中',
intro2: '作为上下文',
@ -22,6 +78,22 @@ const translation = {
unavailableTip: '由于 embedding 模型不可用,需要配置默认 embedding 模型',
datasets: '知识库',
datasetsApi: 'API',
externalKnowledgeForm: {
connect: '连接',
cancel: '取消',
},
externalAPIForm: {
name: '名称',
endpoint: 'API 端点',
apiKey: 'API 密钥',
save: '保存',
cancel: '取消',
edit: '编辑',
encrypted: {
front: '您的 API Token 将使用',
end: '加密并存储。',
},
},
retrieval: {
semantic_search: {
title: '向量检索',
@ -58,6 +130,8 @@ const translation = {
defaultRetrievalTip: '默认情况下使用多路召回。从多个知识库中检索知识,然后重新排序。',
mixtureHighQualityAndEconomicTip: '混合使用高质量和经济型知识库需要配置 Rerank 模型。',
inconsistentEmbeddingModelTip: '当所选知识库配置的 Embedding 模型不一致时,需要配置 Rerank 模型。',
mixtureInternalAndExternalTip: '混合使用内部和外部知识时需要配置 Rerank 模型。',
allExternalTip: '仅使用外部知识时,用户可以选择是否启用 Rerank 模型。如果不启用,检索到的文本块将根据分数排序。当不同知识库的检索策略不一致时,结果可能不准确。',
retrievalSettings: '召回设置',
rerankSettings: 'Rerank 设置',
weightedScore: {

View File

@ -23,6 +23,7 @@ const translation = {
},
noRecentTip: '最近無查詢結果',
viewChart: '查看向量圖表',
viewDetail: '查看詳情',
}
export default translation

View File

@ -25,6 +25,7 @@ export type DataSet = {
app_count: number
document_count: number
word_count: number
provider: string
embedding_model: string
embedding_model_provider: string
embedding_available: boolean
@ -32,6 +33,58 @@ export type DataSet = {
retrieval_model: RetrievalConfig
tags: Tag[]
partial_member_list?: any[]
external_knowledge_info: {
external_knowledge_id: string
external_knowledge_api_id: string
external_knowledge_api_name: string
external_knowledge_api_endpoint: string
}
external_retrieval_model: {
top_k: number
score_threshold: number
score_threshold_enabled: boolean
}
}
export type ExternalAPIItem = {
id: string
tenant_id: string
name: string
description: string
settings: {
endpoint: string
api_key: string
}
dataset_bindings: { id: string; name: string }[]
created_by: string
created_at: string
}
export type ExternalKnowledgeItem = {
id: string
name: string
description: string | null
provider: 'external'
permission: DatasetPermission
data_source_type: null
indexing_technique: null
app_count: number
document_count: number
word_count: number
created_by: string
created_at: string
updated_by: string
updated_at: string
tags: Tag[]
}
export type ExternalAPIDeleteResponse = {
result: 'success' | 'error'
}
export type ExternalAPIUsage = {
is_using: boolean
count: number
}
export type CustomFile = File & {
@ -73,6 +126,14 @@ export type DataSetListResponse = {
total: number
}
export type ExternalAPIListResponse = {
data: ExternalAPIItem[]
has_more: boolean
limit: number
page: number
total: number
}
export type QA = {
question: string
answer: string
@ -385,6 +446,16 @@ export type HitTesting = {
tsne_position: TsnePosition
}
export type ExternalKnowledgeBaseHitTesting = {
content: string
title: string
score: number
metadata: {
'x-amz-bedrock-kb-source-uri': string
'x-amz-bedrock-kb-data-source-id': string
}
}
export type Segment = {
id: string
document: Document
@ -425,6 +496,13 @@ export type HitTestingResponse = {
records: Array<HitTesting>
}
export type ExternalKnowledgeBaseHitTestingResponse = {
query: {
content: string
}
records: Array<ExternalKnowledgeBaseHitTesting>
}
export type RelatedApp = {
id: string
name: string
@ -462,6 +540,9 @@ export type SelectedDatasetsMode = {
allHighQualityFullTextSearch: boolean
allEconomic: boolean
mixtureHighQualityAndEconomic: boolean
allInternal: boolean
allExternal: boolean
mixtureInternalAndExternal: boolean
inconsistentEmbeddingModel: boolean
}

View File

@ -8,6 +8,12 @@ import type {
DocumentDetailResponse,
DocumentListResponse,
ErrorDocsResponse,
ExternalAPIDeleteResponse,
ExternalAPIItem,
ExternalAPIListResponse,
ExternalAPIUsage,
ExternalKnowledgeBaseHitTestingResponse,
ExternalKnowledgeItem,
FileIndexingEstimateResponse,
HitTestingRecordsResponse,
HitTestingResponse,
@ -23,7 +29,9 @@ import type {
SegmentsResponse,
createDocumentResponse,
} from '@/models/datasets'
import { type CommonResponse, type DataSourceNotionWorkspace, DataSourceProvider } from '@/models/common'
import type { CreateKnowledgeBaseReq } from '@/app/components/datasets/external-knowledge-base/create/declarations'
import type { CreateExternalAPIReq } from '@/app/components/datasets/external-api/declarations.ts'
import type { CommonResponse, DataSourceNotionWorkspace } from '@/models/common'
import type {
ApiKeysListResponse,
CreateApiKeyResponse,
@ -82,6 +90,34 @@ export const deleteDataset: Fetcher<DataSet, string> = (datasetID) => {
return del<DataSet>(`/datasets/${datasetID}`)
}
export const fetchExternalAPIList: Fetcher<ExternalAPIListResponse, { url: string }> = ({ url }) => {
return get<ExternalAPIListResponse>(url)
}
export const fetchExternalAPI: Fetcher<ExternalAPIItem, { apiTemplateId: string }> = ({ apiTemplateId }) => {
return get<ExternalAPIItem>(`/datasets/external-knowledge-api/${apiTemplateId}`)
}
export const updateExternalAPI: Fetcher<ExternalAPIItem, { apiTemplateId: string; body: ExternalAPIItem }> = ({ apiTemplateId, body }) => {
return patch<ExternalAPIItem>(`/datasets/external-knowledge-api/${apiTemplateId}`, { body })
}
export const deleteExternalAPI: Fetcher<ExternalAPIDeleteResponse, { apiTemplateId: string }> = ({ apiTemplateId }) => {
return del<ExternalAPIDeleteResponse>(`/datasets/external-knowledge-api/${apiTemplateId}`)
}
export const checkUsageExternalAPI: Fetcher<ExternalAPIUsage, { apiTemplateId: string }> = ({ apiTemplateId }) => {
return get<ExternalAPIUsage>(`/datasets/external-knowledge-api/${apiTemplateId}/use-check`)
}
export const createExternalAPI: Fetcher<ExternalAPIItem, { body: CreateExternalAPIReq }> = ({ body }) => {
return post<ExternalAPIItem>('/datasets/external-knowledge-api', { body })
}
export const createExternalKnowledgeBase: Fetcher<ExternalKnowledgeItem, { body: CreateKnowledgeBaseReq }> = ({ body }) => {
return post<ExternalKnowledgeItem>('/datasets/external', { body })
}
export const fetchDefaultProcessRule: Fetcher<ProcessRuleResponse, { url: string }> = ({ url }) => {
return get<ProcessRuleResponse>(url)
}
@ -209,6 +245,10 @@ export const hitTesting: Fetcher<HitTestingResponse, { datasetId: string; queryT
return post<HitTestingResponse>(`/datasets/${datasetId}/hit-testing`, { body: { query: queryText, retrieval_model } })
}
export const externalKnowledgeBaseHitTesting: Fetcher<ExternalKnowledgeBaseHitTestingResponse, { datasetId: string; query: string; external_retrieval_model: { top_k: number; score_threshold: number; score_threshold_enabled: boolean } }> = ({ datasetId, query, external_retrieval_model }) => {
return post<ExternalKnowledgeBaseHitTestingResponse>(`/datasets/${datasetId}/external-hit-testing`, { body: { query, external_retrieval_model } })
}
export const fetchTestingRecords: Fetcher<HitTestingRecordsResponse, { datasetId: string; params: { page: number; limit: number } }> = ({ datasetId, params }) => {
return get<HitTestingRecordsResponse>(`/datasets/${datasetId}/queries`, { params })
}