Merge branch 'main' into feat/iteration-node-parallel

This commit is contained in:
Nov1c444 2024-10-18 16:58:44 +08:00
commit 00127b6ccf
131 changed files with 1281 additions and 378 deletions

View File

@ -10,44 +10,20 @@ if os.environ.get("DEBUG", "false").lower() != "true":
grpc.experimental.gevent.init_gevent() grpc.experimental.gevent.init_gevent()
import json import json
import logging
import sys
import threading import threading
import time import time
import warnings import warnings
from logging.handlers import RotatingFileHandler
from flask import Flask, Response, request from flask import Response
from flask_cors import CORS
from werkzeug.exceptions import Unauthorized
import contexts from app_factory import create_app
from commands import register_commands
from configs import dify_config
# DO NOT REMOVE BELOW # DO NOT REMOVE BELOW
from events import event_handlers # noqa: F401 from events import event_handlers # noqa: F401
from extensions import (
ext_celery,
ext_code_based_extension,
ext_compress,
ext_database,
ext_hosting_provider,
ext_login,
ext_mail,
ext_migrate,
ext_proxy_fix,
ext_redis,
ext_sentry,
ext_storage,
)
from extensions.ext_database import db from extensions.ext_database import db
from extensions.ext_login import login_manager
from libs.passport import PassportService
# TODO: Find a way to avoid importing models here # TODO: Find a way to avoid importing models here
from models import account, dataset, model, source, task, tool, tools, web # noqa: F401 from models import account, dataset, model, source, task, tool, tools, web # noqa: F401
from services.account_service import AccountService
# DO NOT REMOVE ABOVE # DO NOT REMOVE ABOVE
@ -60,188 +36,12 @@ if hasattr(time, "tzset"):
time.tzset() time.tzset()
class DifyApp(Flask):
pass
# ------------- # -------------
# Configuration # Configuration
# ------------- # -------------
config_type = os.getenv("EDITION", default="SELF_HOSTED") # ce edition first config_type = os.getenv("EDITION", default="SELF_HOSTED") # ce edition first
# ----------------------------
# Application Factory Function
# ----------------------------
def create_flask_app_with_configs() -> Flask:
"""
create a raw flask app
with configs loaded from .env file
"""
dify_app = DifyApp(__name__)
dify_app.config.from_mapping(dify_config.model_dump())
# populate configs into system environment variables
for key, value in dify_app.config.items():
if isinstance(value, str):
os.environ[key] = value
elif isinstance(value, int | float | bool):
os.environ[key] = str(value)
elif value is None:
os.environ[key] = ""
return dify_app
def create_app() -> Flask:
app = create_flask_app_with_configs()
app.secret_key = app.config["SECRET_KEY"]
log_handlers = None
log_file = app.config.get("LOG_FILE")
if log_file:
log_dir = os.path.dirname(log_file)
os.makedirs(log_dir, exist_ok=True)
log_handlers = [
RotatingFileHandler(
filename=log_file,
maxBytes=1024 * 1024 * 1024,
backupCount=5,
),
logging.StreamHandler(sys.stdout),
]
logging.basicConfig(
level=app.config.get("LOG_LEVEL"),
format=app.config.get("LOG_FORMAT"),
datefmt=app.config.get("LOG_DATEFORMAT"),
handlers=log_handlers,
force=True,
)
log_tz = app.config.get("LOG_TZ")
if log_tz:
from datetime import datetime
import pytz
timezone = pytz.timezone(log_tz)
def time_converter(seconds):
return datetime.utcfromtimestamp(seconds).astimezone(timezone).timetuple()
for handler in logging.root.handlers:
handler.formatter.converter = time_converter
initialize_extensions(app)
register_blueprints(app)
register_commands(app)
return app
def initialize_extensions(app):
# Since the application instance is now created, pass it to each Flask
# extension instance to bind it to the Flask application instance (app)
ext_compress.init_app(app)
ext_code_based_extension.init()
ext_database.init_app(app)
ext_migrate.init(app, db)
ext_redis.init_app(app)
ext_storage.init_app(app)
ext_celery.init_app(app)
ext_login.init_app(app)
ext_mail.init_app(app)
ext_hosting_provider.init_app(app)
ext_sentry.init_app(app)
ext_proxy_fix.init_app(app)
# Flask-Login configuration
@login_manager.request_loader
def load_user_from_request(request_from_flask_login):
"""Load user based on the request."""
if request.blueprint not in {"console", "inner_api"}:
return None
# Check if the user_id contains a dot, indicating the old format
auth_header = request.headers.get("Authorization", "")
if not auth_header:
auth_token = request.args.get("_token")
if not auth_token:
raise Unauthorized("Invalid Authorization token.")
else:
if " " not in auth_header:
raise Unauthorized("Invalid Authorization header format. Expected 'Bearer <api-key>' format.")
auth_scheme, auth_token = auth_header.split(None, 1)
auth_scheme = auth_scheme.lower()
if auth_scheme != "bearer":
raise Unauthorized("Invalid Authorization header format. Expected 'Bearer <api-key>' format.")
decoded = PassportService().verify(auth_token)
user_id = decoded.get("user_id")
logged_in_account = AccountService.load_logged_in_account(account_id=user_id)
if logged_in_account:
contexts.tenant_id.set(logged_in_account.current_tenant_id)
return logged_in_account
@login_manager.unauthorized_handler
def unauthorized_handler():
"""Handle unauthorized requests."""
return Response(
json.dumps({"code": "unauthorized", "message": "Unauthorized."}),
status=401,
content_type="application/json",
)
# register blueprint routers
def register_blueprints(app):
from controllers.console import bp as console_app_bp
from controllers.files import bp as files_bp
from controllers.inner_api import bp as inner_api_bp
from controllers.service_api import bp as service_api_bp
from controllers.web import bp as web_bp
CORS(
service_api_bp,
allow_headers=["Content-Type", "Authorization", "X-App-Code"],
methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"],
)
app.register_blueprint(service_api_bp)
CORS(
web_bp,
resources={r"/*": {"origins": app.config["WEB_API_CORS_ALLOW_ORIGINS"]}},
supports_credentials=True,
allow_headers=["Content-Type", "Authorization", "X-App-Code"],
methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"],
expose_headers=["X-Version", "X-Env"],
)
app.register_blueprint(web_bp)
CORS(
console_app_bp,
resources={r"/*": {"origins": app.config["CONSOLE_CORS_ALLOW_ORIGINS"]}},
supports_credentials=True,
allow_headers=["Content-Type", "Authorization"],
methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"],
expose_headers=["X-Version", "X-Env"],
)
app.register_blueprint(console_app_bp)
CORS(files_bp, allow_headers=["Content-Type"], methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"])
app.register_blueprint(files_bp)
app.register_blueprint(inner_api_bp)
# create app # create app
app = create_app() app = create_app()
celery = app.extensions["celery"] celery = app.extensions["celery"]

213
api/app_factory.py Normal file
View File

@ -0,0 +1,213 @@
import os
if os.environ.get("DEBUG", "false").lower() != "true":
from gevent import monkey
monkey.patch_all()
import grpc.experimental.gevent
grpc.experimental.gevent.init_gevent()
import json
import logging
import sys
from logging.handlers import RotatingFileHandler
from flask import Flask, Response, request
from flask_cors import CORS
from werkzeug.exceptions import Unauthorized
import contexts
from commands import register_commands
from configs import dify_config
from extensions import (
ext_celery,
ext_code_based_extension,
ext_compress,
ext_database,
ext_hosting_provider,
ext_login,
ext_mail,
ext_migrate,
ext_proxy_fix,
ext_redis,
ext_sentry,
ext_storage,
)
from extensions.ext_database import db
from extensions.ext_login import login_manager
from libs.passport import PassportService
from services.account_service import AccountService
class DifyApp(Flask):
pass
# ----------------------------
# Application Factory Function
# ----------------------------
def create_flask_app_with_configs() -> Flask:
"""
create a raw flask app
with configs loaded from .env file
"""
dify_app = DifyApp(__name__)
dify_app.config.from_mapping(dify_config.model_dump())
# populate configs into system environment variables
for key, value in dify_app.config.items():
if isinstance(value, str):
os.environ[key] = value
elif isinstance(value, int | float | bool):
os.environ[key] = str(value)
elif value is None:
os.environ[key] = ""
return dify_app
def create_app() -> Flask:
app = create_flask_app_with_configs()
app.secret_key = app.config["SECRET_KEY"]
log_handlers = None
log_file = app.config.get("LOG_FILE")
if log_file:
log_dir = os.path.dirname(log_file)
os.makedirs(log_dir, exist_ok=True)
log_handlers = [
RotatingFileHandler(
filename=log_file,
maxBytes=1024 * 1024 * 1024,
backupCount=5,
),
logging.StreamHandler(sys.stdout),
]
logging.basicConfig(
level=app.config.get("LOG_LEVEL"),
format=app.config.get("LOG_FORMAT"),
datefmt=app.config.get("LOG_DATEFORMAT"),
handlers=log_handlers,
force=True,
)
log_tz = app.config.get("LOG_TZ")
if log_tz:
from datetime import datetime
import pytz
timezone = pytz.timezone(log_tz)
def time_converter(seconds):
return datetime.utcfromtimestamp(seconds).astimezone(timezone).timetuple()
for handler in logging.root.handlers:
handler.formatter.converter = time_converter
initialize_extensions(app)
register_blueprints(app)
register_commands(app)
return app
def initialize_extensions(app):
# Since the application instance is now created, pass it to each Flask
# extension instance to bind it to the Flask application instance (app)
ext_compress.init_app(app)
ext_code_based_extension.init()
ext_database.init_app(app)
ext_migrate.init(app, db)
ext_redis.init_app(app)
ext_storage.init_app(app)
ext_celery.init_app(app)
ext_login.init_app(app)
ext_mail.init_app(app)
ext_hosting_provider.init_app(app)
ext_sentry.init_app(app)
ext_proxy_fix.init_app(app)
# Flask-Login configuration
@login_manager.request_loader
def load_user_from_request(request_from_flask_login):
"""Load user based on the request."""
if request.blueprint not in {"console", "inner_api"}:
return None
# Check if the user_id contains a dot, indicating the old format
auth_header = request.headers.get("Authorization", "")
if not auth_header:
auth_token = request.args.get("_token")
if not auth_token:
raise Unauthorized("Invalid Authorization token.")
else:
if " " not in auth_header:
raise Unauthorized("Invalid Authorization header format. Expected 'Bearer <api-key>' format.")
auth_scheme, auth_token = auth_header.split(None, 1)
auth_scheme = auth_scheme.lower()
if auth_scheme != "bearer":
raise Unauthorized("Invalid Authorization header format. Expected 'Bearer <api-key>' format.")
decoded = PassportService().verify(auth_token)
user_id = decoded.get("user_id")
logged_in_account = AccountService.load_logged_in_account(account_id=user_id)
if logged_in_account:
contexts.tenant_id.set(logged_in_account.current_tenant_id)
return logged_in_account
@login_manager.unauthorized_handler
def unauthorized_handler():
"""Handle unauthorized requests."""
return Response(
json.dumps({"code": "unauthorized", "message": "Unauthorized."}),
status=401,
content_type="application/json",
)
# register blueprint routers
def register_blueprints(app):
from controllers.console import bp as console_app_bp
from controllers.files import bp as files_bp
from controllers.inner_api import bp as inner_api_bp
from controllers.service_api import bp as service_api_bp
from controllers.web import bp as web_bp
CORS(
service_api_bp,
allow_headers=["Content-Type", "Authorization", "X-App-Code"],
methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"],
)
app.register_blueprint(service_api_bp)
CORS(
web_bp,
resources={r"/*": {"origins": app.config["WEB_API_CORS_ALLOW_ORIGINS"]}},
supports_credentials=True,
allow_headers=["Content-Type", "Authorization", "X-App-Code"],
methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"],
expose_headers=["X-Version", "X-Env"],
)
app.register_blueprint(web_bp)
CORS(
console_app_bp,
resources={r"/*": {"origins": app.config["CONSOLE_CORS_ALLOW_ORIGINS"]}},
supports_credentials=True,
allow_headers=["Content-Type", "Authorization"],
methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"],
expose_headers=["X-Version", "X-Env"],
)
app.register_blueprint(console_app_bp)
CORS(files_bp, allow_headers=["Content-Type"], methods=["GET", "PUT", "POST", "DELETE", "OPTIONS", "PATCH"])
app.register_blueprint(files_bp)
app.register_blueprint(inner_api_bp)

View File

@ -259,6 +259,25 @@ def migrate_knowledge_vector_database():
skipped_count = 0 skipped_count = 0
total_count = 0 total_count = 0
vector_type = dify_config.VECTOR_STORE vector_type = dify_config.VECTOR_STORE
upper_colletion_vector_types = {
VectorType.MILVUS,
VectorType.PGVECTOR,
VectorType.RELYT,
VectorType.WEAVIATE,
VectorType.ORACLE,
VectorType.ELASTICSEARCH,
}
lower_colletion_vector_types = {
VectorType.ANALYTICDB,
VectorType.CHROMA,
VectorType.MYSCALE,
VectorType.PGVECTO_RS,
VectorType.TIDB_VECTOR,
VectorType.OPENSEARCH,
VectorType.TENCENT,
VectorType.BAIDU,
VectorType.VIKINGDB,
}
page = 1 page = 1
while True: while True:
try: try:
@ -284,11 +303,9 @@ def migrate_knowledge_vector_database():
skipped_count = skipped_count + 1 skipped_count = skipped_count + 1
continue continue
collection_name = "" collection_name = ""
if vector_type == VectorType.WEAVIATE: dataset_id = dataset.id
dataset_id = dataset.id if vector_type in upper_colletion_vector_types:
collection_name = Dataset.gen_collection_name_by_id(dataset_id) collection_name = Dataset.gen_collection_name_by_id(dataset_id)
index_struct_dict = {"type": VectorType.WEAVIATE, "vector_store": {"class_prefix": collection_name}}
dataset.index_struct = json.dumps(index_struct_dict)
elif vector_type == VectorType.QDRANT: elif vector_type == VectorType.QDRANT:
if dataset.collection_binding_id: if dataset.collection_binding_id:
dataset_collection_binding = ( dataset_collection_binding = (
@ -301,63 +318,15 @@ def migrate_knowledge_vector_database():
else: else:
raise ValueError("Dataset Collection Binding not found") raise ValueError("Dataset Collection Binding not found")
else: else:
dataset_id = dataset.id
collection_name = Dataset.gen_collection_name_by_id(dataset_id) collection_name = Dataset.gen_collection_name_by_id(dataset_id)
index_struct_dict = {"type": VectorType.QDRANT, "vector_store": {"class_prefix": collection_name}}
dataset.index_struct = json.dumps(index_struct_dict)
elif vector_type == VectorType.MILVUS: elif vector_type in lower_colletion_vector_types:
dataset_id = dataset.id collection_name = Dataset.gen_collection_name_by_id(dataset_id).lower()
collection_name = Dataset.gen_collection_name_by_id(dataset_id)
index_struct_dict = {"type": VectorType.MILVUS, "vector_store": {"class_prefix": collection_name}}
dataset.index_struct = json.dumps(index_struct_dict)
elif vector_type == VectorType.RELYT:
dataset_id = dataset.id
collection_name = Dataset.gen_collection_name_by_id(dataset_id)
index_struct_dict = {"type": "relyt", "vector_store": {"class_prefix": collection_name}}
dataset.index_struct = json.dumps(index_struct_dict)
elif vector_type == VectorType.TENCENT:
dataset_id = dataset.id
collection_name = Dataset.gen_collection_name_by_id(dataset_id)
index_struct_dict = {"type": VectorType.TENCENT, "vector_store": {"class_prefix": collection_name}}
dataset.index_struct = json.dumps(index_struct_dict)
elif vector_type == VectorType.PGVECTOR:
dataset_id = dataset.id
collection_name = Dataset.gen_collection_name_by_id(dataset_id)
index_struct_dict = {"type": VectorType.PGVECTOR, "vector_store": {"class_prefix": collection_name}}
dataset.index_struct = json.dumps(index_struct_dict)
elif vector_type == VectorType.OPENSEARCH:
dataset_id = dataset.id
collection_name = Dataset.gen_collection_name_by_id(dataset_id)
index_struct_dict = {
"type": VectorType.OPENSEARCH,
"vector_store": {"class_prefix": collection_name},
}
dataset.index_struct = json.dumps(index_struct_dict)
elif vector_type == VectorType.ANALYTICDB:
dataset_id = dataset.id
collection_name = Dataset.gen_collection_name_by_id(dataset_id)
index_struct_dict = {
"type": VectorType.ANALYTICDB,
"vector_store": {"class_prefix": collection_name},
}
dataset.index_struct = json.dumps(index_struct_dict)
elif vector_type == VectorType.ELASTICSEARCH:
dataset_id = dataset.id
index_name = Dataset.gen_collection_name_by_id(dataset_id)
index_struct_dict = {"type": "elasticsearch", "vector_store": {"class_prefix": index_name}}
dataset.index_struct = json.dumps(index_struct_dict)
elif vector_type == VectorType.BAIDU:
dataset_id = dataset.id
collection_name = Dataset.gen_collection_name_by_id(dataset_id)
index_struct_dict = {
"type": VectorType.BAIDU,
"vector_store": {"class_prefix": collection_name},
}
dataset.index_struct = json.dumps(index_struct_dict)
else: else:
raise ValueError(f"Vector store {vector_type} is not supported.") raise ValueError(f"Vector store {vector_type} is not supported.")
index_struct_dict = {"type": vector_type, "vector_store": {"class_prefix": collection_name}}
dataset.index_struct = json.dumps(index_struct_dict)
vector = Vector(dataset) vector = Vector(dataset)
click.echo(f"Migrating dataset {dataset.id}.") click.echo(f"Migrating dataset {dataset.id}.")

View File

@ -3,7 +3,7 @@ import os
from collections.abc import Callable, Generator, Sequence from collections.abc import Callable, Generator, Sequence
from typing import IO, Optional, Union, cast from typing import IO, Optional, Union, cast
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle
from core.entities.provider_entities import ModelLoadBalancingConfiguration from core.entities.provider_entities import ModelLoadBalancingConfiguration
from core.errors.error import ProviderTokenNotInitError from core.errors.error import ProviderTokenNotInitError

View File

@ -4,7 +4,7 @@ from typing import Optional
from pydantic import ConfigDict from pydantic import ConfigDict
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelType from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelType
from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
from core.model_runtime.model_providers.__base.ai_model import AIModel from core.model_runtime.model_providers.__base.ai_model import AIModel

View File

@ -7,7 +7,7 @@ import numpy as np
import tiktoken import tiktoken
from openai import AzureOpenAI from openai import AzureOpenAI
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.model_entities import AIModelEntity, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError from core.model_runtime.errors.validate import CredentialsValidateFailedError

View File

@ -4,7 +4,7 @@ from typing import Optional
from requests import post from requests import post
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (

View File

@ -13,7 +13,7 @@ from botocore.exceptions import (
UnknownServiceError, UnknownServiceError,
) )
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (

View File

@ -5,7 +5,7 @@ import cohere
import numpy as np import numpy as np
from cohere.core import RequestOptions from cohere.core import RequestOptions
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (

View File

@ -5,7 +5,7 @@ from typing import Optional, Union
import numpy as np import numpy as np
from openai import OpenAI from openai import OpenAI
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError from core.model_runtime.errors.validate import CredentialsValidateFailedError

View File

@ -6,7 +6,7 @@ import numpy as np
import requests import requests
from huggingface_hub import HfApi, InferenceClient from huggingface_hub import HfApi, InferenceClient
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult

View File

@ -1,7 +1,7 @@
import time import time
from typing import Optional from typing import Optional
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult

View File

@ -9,7 +9,7 @@ from tencentcloud.common.profile.client_profile import ClientProfile
from tencentcloud.common.profile.http_profile import HttpProfile from tencentcloud.common.profile.http_profile import HttpProfile
from tencentcloud.hunyuan.v20230901 import hunyuan_client, models from tencentcloud.hunyuan.v20230901 import hunyuan_client, models
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (

View File

@ -4,7 +4,7 @@ from typing import Optional
from requests import post from requests import post
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult

View File

@ -5,7 +5,7 @@ from typing import Optional
from requests import post from requests import post
from yarl import URL from yarl import URL
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult

View File

@ -4,7 +4,7 @@ from typing import Optional
from requests import post from requests import post
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (

View File

@ -4,7 +4,7 @@ from typing import Optional
import requests import requests
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult

View File

@ -5,7 +5,7 @@ from typing import Optional
from nomic import embed from nomic import embed
from nomic import login as nomic_login from nomic import login as nomic_login
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import ( from core.model_runtime.entities.text_embedding_entities import (
EmbeddingUsage, EmbeddingUsage,

View File

@ -4,7 +4,7 @@ from typing import Optional
from requests import post from requests import post
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (

View File

@ -6,7 +6,7 @@ from typing import Optional
import numpy as np import numpy as np
import oci import oci
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (

View File

@ -8,7 +8,7 @@ from urllib.parse import urljoin
import numpy as np import numpy as np
import requests import requests
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import ( from core.model_runtime.entities.model_entities import (
AIModelEntity, AIModelEntity,

View File

@ -6,7 +6,7 @@ import numpy as np
import tiktoken import tiktoken
from openai import OpenAI from openai import OpenAI
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError from core.model_runtime.errors.validate import CredentialsValidateFailedError

View File

@ -7,7 +7,7 @@ from urllib.parse import urljoin
import numpy as np import numpy as np
import requests import requests
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import ( from core.model_runtime.entities.model_entities import (
AIModelEntity, AIModelEntity,

View File

@ -5,7 +5,7 @@ from typing import Optional
from requests import post from requests import post
from requests.exceptions import ConnectionError, InvalidSchema, MissingSchema from requests.exceptions import ConnectionError, InvalidSchema, MissingSchema
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import ( from core.model_runtime.errors.invoke import (

View File

@ -35,6 +35,15 @@ parameter_rules:
help: help:
zh_Hans: 控制生成结果的随机性。数值越小随机性越弱数值越大随机性越强。一般而言top_p 和 temperature 两个参数选择一个进行调整即可。 zh_Hans: 控制生成结果的随机性。数值越小随机性越弱数值越大随机性越强。一般而言top_p 和 temperature 两个参数选择一个进行调整即可。
en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature. en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty - name: frequency_penalty
use_template: frequency_penalty use_template: frequency_penalty
default: 0 default: 0

View File

@ -18,6 +18,15 @@ parameter_rules:
min: 0 min: 0
max: 1 max: 1
default: 1 default: 1
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens - name: max_tokens
use_template: max_tokens use_template: max_tokens
min: 1 min: 1

View File

@ -14,6 +14,15 @@ parameter_rules:
use_template: temperature use_template: temperature
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: presence_penalty - name: presence_penalty
use_template: presence_penalty use_template: presence_penalty
- name: frequency_penalty - name: frequency_penalty

View File

@ -14,6 +14,15 @@ parameter_rules:
use_template: temperature use_template: temperature
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: presence_penalty - name: presence_penalty
use_template: presence_penalty use_template: presence_penalty
- name: frequency_penalty - name: frequency_penalty

View File

@ -14,6 +14,15 @@ parameter_rules:
use_template: temperature use_template: temperature
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: presence_penalty - name: presence_penalty
use_template: presence_penalty use_template: presence_penalty
- name: frequency_penalty - name: frequency_penalty

View File

@ -16,6 +16,15 @@ parameter_rules:
use_template: temperature use_template: temperature
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: presence_penalty - name: presence_penalty
use_template: presence_penalty use_template: presence_penalty
- name: frequency_penalty - name: frequency_penalty

View File

@ -15,6 +15,15 @@ parameter_rules:
use_template: temperature use_template: temperature
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: presence_penalty - name: presence_penalty
use_template: presence_penalty use_template: presence_penalty
- name: frequency_penalty - name: frequency_penalty

View File

@ -15,6 +15,15 @@ parameter_rules:
use_template: temperature use_template: temperature
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: presence_penalty - name: presence_penalty
use_template: presence_penalty use_template: presence_penalty
- name: frequency_penalty - name: frequency_penalty

View File

@ -10,6 +10,15 @@ parameter_rules:
use_template: temperature use_template: temperature
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens - name: max_tokens
use_template: max_tokens use_template: max_tokens
required: true required: true

View File

@ -10,6 +10,15 @@ parameter_rules:
use_template: temperature use_template: temperature
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens - name: max_tokens
use_template: max_tokens use_template: max_tokens
required: true required: true

View File

@ -10,6 +10,15 @@ parameter_rules:
use_template: temperature use_template: temperature
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens - name: max_tokens
use_template: max_tokens use_template: max_tokens
required: true required: true

View File

@ -10,6 +10,15 @@ parameter_rules:
use_template: temperature use_template: temperature
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens - name: max_tokens
use_template: max_tokens use_template: max_tokens
required: true required: true

View File

@ -10,6 +10,15 @@ parameter_rules:
use_template: temperature use_template: temperature
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens - name: max_tokens
use_template: max_tokens use_template: max_tokens
required: true required: true

View File

@ -18,6 +18,15 @@ parameter_rules:
default: 1 default: 1
min: 0 min: 0
max: 1 max: 1
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens - name: max_tokens
use_template: max_tokens use_template: max_tokens
default: 1024 default: 1024

View File

@ -18,6 +18,15 @@ parameter_rules:
default: 1 default: 1
min: 0 min: 0
max: 1 max: 1
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens - name: max_tokens
use_template: max_tokens use_template: max_tokens
default: 1024 default: 1024

View File

@ -19,6 +19,15 @@ parameter_rules:
default: 1 default: 1
min: 0 min: 0
max: 1 max: 1
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: max_tokens - name: max_tokens
use_template: max_tokens use_template: max_tokens
default: 1024 default: 1024

View File

@ -12,6 +12,15 @@ parameter_rules:
use_template: temperature use_template: temperature
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: presence_penalty - name: presence_penalty
use_template: presence_penalty use_template: presence_penalty
- name: frequency_penalty - name: frequency_penalty

View File

@ -12,6 +12,15 @@ parameter_rules:
use_template: temperature use_template: temperature
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: presence_penalty - name: presence_penalty
use_template: presence_penalty use_template: presence_penalty
- name: frequency_penalty - name: frequency_penalty

View File

@ -21,6 +21,15 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty - name: frequency_penalty
use_template: frequency_penalty use_template: frequency_penalty
pricing: pricing:

View File

@ -21,6 +21,15 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty - name: frequency_penalty
use_template: frequency_penalty use_template: frequency_penalty
pricing: pricing:

View File

@ -7,7 +7,7 @@ from urllib.parse import urljoin
import numpy as np import numpy as np
import requests import requests
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import ( from core.model_runtime.entities.model_entities import (
AIModelEntity, AIModelEntity,

View File

@ -4,7 +4,7 @@ from typing import Optional
from replicate import Client as ReplicateClient from replicate import Client as ReplicateClient
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult

View File

@ -14,6 +14,7 @@ from core.model_runtime.errors.invoke import (
InvokeRateLimitError, InvokeRateLimitError,
InvokeServerUnavailableError, InvokeServerUnavailableError,
) )
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.speech2text_model import Speech2TextModel from core.model_runtime.model_providers.__base.speech2text_model import Speech2TextModel
from core.model_runtime.model_providers.sagemaker.sagemaker import generate_presigned_url from core.model_runtime.model_providers.sagemaker.sagemaker import generate_presigned_url
@ -77,7 +78,8 @@ class SageMakerSpeech2TextModel(Speech2TextModel):
json_obj = json.loads(json_str) json_obj = json.loads(json_str)
asr_text = json_obj["text"] asr_text = json_obj["text"]
except Exception as e: except Exception as e:
logger.exception(f"Exception {e}, line : {line}") logger.exception(f"failed to invoke speech2text model, {e}")
raise CredentialsValidateFailedError(str(e))
return asr_text return asr_text

View File

@ -6,7 +6,7 @@ from typing import Any, Optional
import boto3 import boto3
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult

View File

@ -21,6 +21,15 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty - name: frequency_penalty
use_template: frequency_penalty use_template: frequency_penalty
pricing: pricing:

View File

@ -21,6 +21,15 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty - name: frequency_penalty
use_template: frequency_penalty use_template: frequency_penalty
pricing: pricing:

View File

@ -21,6 +21,15 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty - name: frequency_penalty
use_template: frequency_penalty use_template: frequency_penalty
pricing: pricing:

View File

@ -21,6 +21,15 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty - name: frequency_penalty
use_template: frequency_penalty use_template: frequency_penalty
pricing: pricing:

View File

@ -21,6 +21,15 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty - name: frequency_penalty
use_template: frequency_penalty use_template: frequency_penalty
pricing: pricing:

View File

@ -21,6 +21,15 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty - name: frequency_penalty
use_template: frequency_penalty use_template: frequency_penalty
pricing: pricing:

View File

@ -21,6 +21,15 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty - name: frequency_penalty
use_template: frequency_penalty use_template: frequency_penalty
pricing: pricing:

View File

@ -21,6 +21,15 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty - name: frequency_penalty
use_template: frequency_penalty use_template: frequency_penalty
pricing: pricing:

View File

@ -21,6 +21,15 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty - name: frequency_penalty
use_template: frequency_penalty use_template: frequency_penalty
pricing: pricing:

View File

@ -21,6 +21,15 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty - name: frequency_penalty
use_template: frequency_penalty use_template: frequency_penalty
pricing: pricing:

View File

@ -21,6 +21,15 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty - name: frequency_penalty
use_template: frequency_penalty use_template: frequency_penalty
pricing: pricing:

View File

@ -21,6 +21,15 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty - name: frequency_penalty
use_template: frequency_penalty use_template: frequency_penalty
pricing: pricing:

View File

@ -21,6 +21,15 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty - name: frequency_penalty
use_template: frequency_penalty use_template: frequency_penalty
pricing: pricing:

View File

@ -21,6 +21,15 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty - name: frequency_penalty
use_template: frequency_penalty use_template: frequency_penalty
pricing: pricing:

View File

@ -21,6 +21,15 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty - name: frequency_penalty
use_template: frequency_penalty use_template: frequency_penalty
pricing: pricing:

View File

@ -21,6 +21,15 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty - name: frequency_penalty
use_template: frequency_penalty use_template: frequency_penalty
pricing: pricing:

View File

@ -21,6 +21,15 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty - name: frequency_penalty
use_template: frequency_penalty use_template: frequency_penalty
pricing: pricing:

View File

@ -21,6 +21,15 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty - name: frequency_penalty
use_template: frequency_penalty use_template: frequency_penalty
pricing: pricing:

View File

@ -21,6 +21,15 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty - name: frequency_penalty
use_template: frequency_penalty use_template: frequency_penalty
pricing: pricing:

View File

@ -21,6 +21,15 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty - name: frequency_penalty
use_template: frequency_penalty use_template: frequency_penalty
pricing: pricing:

View File

@ -21,6 +21,15 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty - name: frequency_penalty
use_template: frequency_penalty use_template: frequency_penalty
pricing: pricing:

View File

@ -21,6 +21,15 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty - name: frequency_penalty
use_template: frequency_penalty use_template: frequency_penalty
pricing: pricing:

View File

@ -21,6 +21,15 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty - name: frequency_penalty
use_template: frequency_penalty use_template: frequency_penalty
pricing: pricing:

View File

@ -21,6 +21,15 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty - name: frequency_penalty
use_template: frequency_penalty use_template: frequency_penalty
pricing: pricing:

View File

@ -21,6 +21,15 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty - name: frequency_penalty
use_template: frequency_penalty use_template: frequency_penalty
pricing: pricing:

View File

@ -21,6 +21,15 @@ parameter_rules:
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter. en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p - name: top_p
use_template: top_p use_template: top_p
- name: top_k
label:
zh_Hans: 取样数量
en_US: Top k
type: int
help:
zh_Hans: 仅从每个后续标记的前 K 个选项中采样。
en_US: Only sample from the top K options for each subsequent token.
required: false
- name: frequency_penalty - name: frequency_penalty
use_template: frequency_penalty use_template: frequency_penalty
pricing: pricing:

View File

@ -1,6 +1,6 @@
from typing import Optional from typing import Optional
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import TextEmbeddingResult
from core.model_runtime.model_providers.openai_api_compatible.text_embedding.text_embedding import ( from core.model_runtime.model_providers.openai_api_compatible.text_embedding.text_embedding import (
OAICompatEmbeddingModel, OAICompatEmbeddingModel,

View File

@ -4,7 +4,7 @@ from typing import Optional
import dashscope import dashscope
import numpy as np import numpy as np
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import ( from core.model_runtime.entities.text_embedding_entities import (
EmbeddingUsage, EmbeddingUsage,

View File

@ -7,7 +7,7 @@ import numpy as np
from openai import OpenAI from openai import OpenAI
from tokenizers import Tokenizer from tokenizers import Tokenizer
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError from core.model_runtime.errors.validate import CredentialsValidateFailedError

View File

@ -9,7 +9,7 @@ from google.cloud import aiplatform
from google.oauth2 import service_account from google.oauth2 import service_account
from vertexai.language_models import TextEmbeddingModel as VertexTextEmbeddingModel from vertexai.language_models import TextEmbeddingModel as VertexTextEmbeddingModel
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import ( from core.model_runtime.entities.model_entities import (
AIModelEntity, AIModelEntity,

View File

@ -2,7 +2,7 @@ import time
from decimal import Decimal from decimal import Decimal
from typing import Optional from typing import Optional
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import ( from core.model_runtime.entities.model_entities import (
AIModelEntity, AIModelEntity,

View File

@ -4,7 +4,7 @@ from typing import Optional
import requests import requests
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult

View File

@ -120,6 +120,7 @@ class _CommonWenxin:
"bge-large-en": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/embeddings/bge_large_en", "bge-large-en": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/embeddings/bge_large_en",
"bge-large-zh": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/embeddings/bge_large_zh", "bge-large-zh": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/embeddings/bge_large_zh",
"tao-8k": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/embeddings/tao_8k", "tao-8k": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/embeddings/tao_8k",
"bce-reranker-base_v1": "https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/reranker/bce_reranker_base",
} }
function_calling_supports = [ function_calling_supports = [

View File

@ -0,0 +1,8 @@
model: bce-reranker-base_v1
model_type: rerank
model_properties:
context_size: 4096
pricing:
input: '0.0005'
unit: '0.001'
currency: RMB

View File

@ -0,0 +1,147 @@
from typing import Optional
import httpx
from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType
from core.model_runtime.entities.rerank_entities import RerankDocument, RerankResult
from core.model_runtime.errors.invoke import (
InvokeAuthorizationError,
InvokeBadRequestError,
InvokeConnectionError,
InvokeError,
InvokeRateLimitError,
InvokeServerUnavailableError,
)
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.rerank_model import RerankModel
from core.model_runtime.model_providers.wenxin._common import _CommonWenxin
class WenxinRerank(_CommonWenxin):
def rerank(self, model: str, query: str, docs: list[str], top_n: Optional[int] = None):
access_token = self._get_access_token()
url = f"{self.api_bases[model]}?access_token={access_token}"
try:
response = httpx.post(
url,
json={"model": model, "query": query, "documents": docs, "top_n": top_n},
headers={"Content-Type": "application/json"},
)
response.raise_for_status()
return response.json()
except httpx.HTTPStatusError as e:
raise InvokeServerUnavailableError(str(e))
class WenxinRerankModel(RerankModel):
"""
Model class for wenxin rerank model.
"""
def _invoke(
self,
model: str,
credentials: dict,
query: str,
docs: list[str],
score_threshold: Optional[float] = None,
top_n: Optional[int] = None,
user: Optional[str] = None,
) -> RerankResult:
"""
Invoke rerank model
:param model: model name
:param credentials: model credentials
:param query: search query
:param docs: docs for reranking
:param score_threshold: score threshold
:param top_n: top n documents to return
:param user: unique user id
:return: rerank result
"""
if len(docs) == 0:
return RerankResult(model=model, docs=[])
api_key = credentials["api_key"]
secret_key = credentials["secret_key"]
wenxin_rerank: WenxinRerank = WenxinRerank(api_key, secret_key)
try:
results = wenxin_rerank.rerank(model, query, docs, top_n)
rerank_documents = []
for result in results["results"]:
index = result["index"]
if "document" in result:
text = result["document"]
else:
# llama.cpp rerank maynot return original documents
text = docs[index]
rerank_document = RerankDocument(
index=index,
text=text,
score=result["relevance_score"],
)
if score_threshold is None or result["relevance_score"] >= score_threshold:
rerank_documents.append(rerank_document)
return RerankResult(model=model, docs=rerank_documents)
except httpx.HTTPStatusError as e:
raise InvokeServerUnavailableError(str(e))
def validate_credentials(self, model: str, credentials: dict) -> None:
"""
Validate model credentials
:param model: model name
:param credentials: model credentials
:return:
"""
try:
self._invoke(
model=model,
credentials=credentials,
query="What is the capital of the United States?",
docs=[
"Carson City is the capital city of the American state of Nevada. At the 2010 United States "
"Census, Carson City had a population of 55,274.",
"The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that "
"are a political division controlled by the United States. Its capital is Saipan.",
],
score_threshold=0.8,
)
except Exception as ex:
raise CredentialsValidateFailedError(str(ex))
@property
def _invoke_error_mapping(self) -> dict[type[InvokeError], list[type[Exception]]]:
"""
Map model invoke error to unified error
"""
return {
InvokeConnectionError: [httpx.ConnectError],
InvokeServerUnavailableError: [httpx.RemoteProtocolError],
InvokeRateLimitError: [],
InvokeAuthorizationError: [httpx.HTTPStatusError],
InvokeBadRequestError: [httpx.RequestError],
}
def get_customizable_model_schema(self, model: str, credentials: dict) -> AIModelEntity:
"""
generate custom model entities from credentials
"""
entity = AIModelEntity(
model=model,
label=I18nObject(en_US=model),
model_type=ModelType.RERANK,
fetch_from=FetchFrom.CUSTOMIZABLE_MODEL,
model_properties={ModelPropertyKey.CONTEXT_SIZE: int(credentials.get("context_size"))},
)
return entity

View File

@ -7,7 +7,7 @@ from typing import Any, Optional
import numpy as np import numpy as np
from requests import Response, post from requests import Response, post
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.invoke import InvokeError from core.model_runtime.errors.invoke import InvokeError

View File

@ -18,6 +18,7 @@ help:
supported_model_types: supported_model_types:
- llm - llm
- text-embedding - text-embedding
- rerank
configurate_methods: configurate_methods:
- predefined-model - predefined-model
provider_credential_schema: provider_credential_schema:

View File

@ -3,7 +3,7 @@ from typing import Optional
from xinference_client.client.restful.restful_client import Client, RESTfulEmbeddingModelHandle from xinference_client.client.restful.restful_client import Client, RESTfulEmbeddingModelHandle
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.common_entities import I18nObject from core.model_runtime.entities.common_entities import I18nObject
from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType from core.model_runtime.entities.model_entities import AIModelEntity, FetchFrom, ModelPropertyKey, ModelType, PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult

View File

@ -7,3 +7,4 @@
- yi-medium-200k - yi-medium-200k
- yi-spark - yi-spark
- yi-large-turbo - yi-large-turbo
- yi-lightning

View File

@ -0,0 +1,43 @@
model: yi-lightning
label:
zh_Hans: yi-lightning
en_US: yi-lightning
model_type: llm
features:
- agent-thought
model_properties:
mode: chat
context_size: 16384
parameter_rules:
- name: temperature
use_template: temperature
type: float
default: 0.3
min: 0.0
max: 2.0
help:
zh_Hans: 控制生成结果的多样性和随机性。数值越小,越严谨;数值越大,越发散。
en_US: Control the diversity and randomness of generated results. The smaller the value, the more rigorous it is; the larger the value, the more divergent it is.
- name: max_tokens
use_template: max_tokens
type: int
default: 1024
min: 1
max: 4000
help:
zh_Hans: 指定生成结果长度的上限。如果生成结果截断,可以调大该参数。
en_US: Specifies the upper limit on the length of generated results. If the generated results are truncated, you can increase this parameter.
- name: top_p
use_template: top_p
type: float
default: 0.9
min: 0.01
max: 1.00
help:
zh_Hans: 控制生成结果的随机性。数值越小随机性越弱数值越大随机性越强。一般而言top_p 和 temperature 两个参数选择一个进行调整即可。
en_US: Control the randomness of generated results. The smaller the value, the weaker the randomness; the larger the value, the stronger the randomness. Generally speaking, you can adjust one of the two parameters top_p and temperature.
pricing:
input: '0.99'
output: '0.99'
unit: '0.000001'
currency: RMB

View File

@ -3,7 +3,7 @@ from typing import Optional
from zhipuai import ZhipuAI from zhipuai import ZhipuAI
from core.embedding.embedding_constant import EmbeddingInputType from core.entities.embedding_type import EmbeddingInputType
from core.model_runtime.entities.model_entities import PriceType from core.model_runtime.entities.model_entities import PriceType
from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult from core.model_runtime.entities.text_embedding_entities import EmbeddingUsage, TextEmbeddingResult
from core.model_runtime.errors.validate import CredentialsValidateFailedError from core.model_runtime.errors.validate import CredentialsValidateFailedError

View File

@ -1,14 +1,14 @@
from typing import Optional from typing import Optional
from core.model_manager import ModelManager from core.model_manager import ModelInstance, ModelManager
from core.model_runtime.entities.model_entities import ModelType from core.model_runtime.entities.model_entities import ModelType
from core.model_runtime.errors.invoke import InvokeAuthorizationError from core.model_runtime.errors.invoke import InvokeAuthorizationError
from core.rag.data_post_processor.reorder import ReorderRunner from core.rag.data_post_processor.reorder import ReorderRunner
from core.rag.models.document import Document from core.rag.models.document import Document
from core.rag.rerank.constants.rerank_mode import RerankMode
from core.rag.rerank.entity.weight import KeywordSetting, VectorSetting, Weights from core.rag.rerank.entity.weight import KeywordSetting, VectorSetting, Weights
from core.rag.rerank.rerank_model import RerankModelRunner from core.rag.rerank.rerank_base import BaseRerankRunner
from core.rag.rerank.weight_rerank import WeightRerankRunner from core.rag.rerank.rerank_factory import RerankRunnerFactory
from core.rag.rerank.rerank_type import RerankMode
class DataPostProcessor: class DataPostProcessor:
@ -47,11 +47,12 @@ class DataPostProcessor:
tenant_id: str, tenant_id: str,
reranking_model: Optional[dict] = None, reranking_model: Optional[dict] = None,
weights: Optional[dict] = None, weights: Optional[dict] = None,
) -> Optional[RerankModelRunner | WeightRerankRunner]: ) -> Optional[BaseRerankRunner]:
if reranking_mode == RerankMode.WEIGHTED_SCORE.value and weights: if reranking_mode == RerankMode.WEIGHTED_SCORE.value and weights:
return WeightRerankRunner( runner = RerankRunnerFactory.create_rerank_runner(
tenant_id, runner_type=reranking_mode,
Weights( tenant_id=tenant_id,
weights=Weights(
vector_setting=VectorSetting( vector_setting=VectorSetting(
vector_weight=weights["vector_setting"]["vector_weight"], vector_weight=weights["vector_setting"]["vector_weight"],
embedding_provider_name=weights["vector_setting"]["embedding_provider_name"], embedding_provider_name=weights["vector_setting"]["embedding_provider_name"],
@ -62,23 +63,33 @@ class DataPostProcessor:
), ),
), ),
) )
return runner
elif reranking_mode == RerankMode.RERANKING_MODEL.value: elif reranking_mode == RerankMode.RERANKING_MODEL.value:
if reranking_model: rerank_model_instance = self._get_rerank_model_instance(tenant_id, reranking_model)
try: if rerank_model_instance is None:
model_manager = ModelManager() return None
rerank_model_instance = model_manager.get_model_instance( runner = RerankRunnerFactory.create_rerank_runner(
tenant_id=tenant_id, runner_type=reranking_mode, rerank_model_instance=rerank_model_instance
provider=reranking_model["reranking_provider_name"], )
model_type=ModelType.RERANK, return runner
model=reranking_model["reranking_model_name"],
)
except InvokeAuthorizationError:
return None
return RerankModelRunner(rerank_model_instance)
return None
return None return None
def _get_reorder_runner(self, reorder_enabled) -> Optional[ReorderRunner]: def _get_reorder_runner(self, reorder_enabled) -> Optional[ReorderRunner]:
if reorder_enabled: if reorder_enabled:
return ReorderRunner() return ReorderRunner()
return None return None
def _get_rerank_model_instance(self, tenant_id: str, reranking_model: Optional[dict]) -> ModelInstance | None:
if reranking_model:
try:
model_manager = ModelManager()
rerank_model_instance = model_manager.get_model_instance(
tenant_id=tenant_id,
provider=reranking_model["reranking_provider_name"],
model_type=ModelType.RERANK,
model=reranking_model["reranking_model_name"],
)
return rerank_model_instance
except InvokeAuthorizationError:
return None
return None

View File

@ -6,7 +6,7 @@ from flask import Flask, current_app
from core.rag.data_post_processor.data_post_processor import DataPostProcessor from core.rag.data_post_processor.data_post_processor import DataPostProcessor
from core.rag.datasource.keyword.keyword_factory import Keyword from core.rag.datasource.keyword.keyword_factory import Keyword
from core.rag.datasource.vdb.vector_factory import Vector from core.rag.datasource.vdb.vector_factory import Vector
from core.rag.rerank.constants.rerank_mode import RerankMode from core.rag.rerank.rerank_type import RerankMode
from core.rag.retrieval.retrieval_methods import RetrievalMethod from core.rag.retrieval.retrieval_methods import RetrievalMethod
from extensions.ext_database import db from extensions.ext_database import db
from models.dataset import Dataset from models.dataset import Dataset

View File

@ -9,10 +9,10 @@ _import_err_msg = (
) )
from configs import dify_config from configs import dify_config
from core.rag.datasource.entity.embedding import Embeddings
from core.rag.datasource.vdb.vector_base import BaseVector from core.rag.datasource.vdb.vector_base import BaseVector
from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory
from core.rag.datasource.vdb.vector_type import VectorType from core.rag.datasource.vdb.vector_type import VectorType
from core.rag.embedding.embedding_base import Embeddings
from core.rag.models.document import Document from core.rag.models.document import Document
from extensions.ext_redis import redis_client from extensions.ext_redis import redis_client
from models.dataset import Dataset from models.dataset import Dataset

View File

@ -12,10 +12,10 @@ from pymochow.model.schema import Field, HNSWParams, Schema, VectorIndex
from pymochow.model.table import AnnSearch, HNSWSearchParams, Partition, Row from pymochow.model.table import AnnSearch, HNSWSearchParams, Partition, Row
from configs import dify_config from configs import dify_config
from core.rag.datasource.entity.embedding import Embeddings
from core.rag.datasource.vdb.vector_base import BaseVector from core.rag.datasource.vdb.vector_base import BaseVector
from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory
from core.rag.datasource.vdb.vector_type import VectorType from core.rag.datasource.vdb.vector_type import VectorType
from core.rag.embedding.embedding_base import Embeddings
from core.rag.models.document import Document from core.rag.models.document import Document
from extensions.ext_redis import redis_client from extensions.ext_redis import redis_client
from models.dataset import Dataset from models.dataset import Dataset

View File

@ -6,10 +6,10 @@ from chromadb import QueryResult, Settings
from pydantic import BaseModel from pydantic import BaseModel
from configs import dify_config from configs import dify_config
from core.rag.datasource.entity.embedding import Embeddings
from core.rag.datasource.vdb.vector_base import BaseVector from core.rag.datasource.vdb.vector_base import BaseVector
from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory
from core.rag.datasource.vdb.vector_type import VectorType from core.rag.datasource.vdb.vector_type import VectorType
from core.rag.embedding.embedding_base import Embeddings
from core.rag.models.document import Document from core.rag.models.document import Document
from extensions.ext_redis import redis_client from extensions.ext_redis import redis_client
from models.dataset import Dataset from models.dataset import Dataset

View File

@ -9,11 +9,11 @@ from elasticsearch import Elasticsearch
from flask import current_app from flask import current_app
from pydantic import BaseModel, model_validator from pydantic import BaseModel, model_validator
from core.rag.datasource.entity.embedding import Embeddings
from core.rag.datasource.vdb.field import Field from core.rag.datasource.vdb.field import Field
from core.rag.datasource.vdb.vector_base import BaseVector from core.rag.datasource.vdb.vector_base import BaseVector
from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory
from core.rag.datasource.vdb.vector_type import VectorType from core.rag.datasource.vdb.vector_type import VectorType
from core.rag.embedding.embedding_base import Embeddings
from core.rag.models.document import Document from core.rag.models.document import Document
from extensions.ext_redis import redis_client from extensions.ext_redis import redis_client
from models.dataset import Dataset from models.dataset import Dataset

View File

@ -7,11 +7,11 @@ from pymilvus import MilvusClient, MilvusException
from pymilvus.milvus_client import IndexParams from pymilvus.milvus_client import IndexParams
from configs import dify_config from configs import dify_config
from core.rag.datasource.entity.embedding import Embeddings
from core.rag.datasource.vdb.field import Field from core.rag.datasource.vdb.field import Field
from core.rag.datasource.vdb.vector_base import BaseVector from core.rag.datasource.vdb.vector_base import BaseVector
from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory
from core.rag.datasource.vdb.vector_type import VectorType from core.rag.datasource.vdb.vector_type import VectorType
from core.rag.embedding.embedding_base import Embeddings
from core.rag.models.document import Document from core.rag.models.document import Document
from extensions.ext_redis import redis_client from extensions.ext_redis import redis_client
from models.dataset import Dataset from models.dataset import Dataset

View File

@ -8,10 +8,10 @@ from clickhouse_connect import get_client
from pydantic import BaseModel from pydantic import BaseModel
from configs import dify_config from configs import dify_config
from core.rag.datasource.entity.embedding import Embeddings
from core.rag.datasource.vdb.vector_base import BaseVector from core.rag.datasource.vdb.vector_base import BaseVector
from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory
from core.rag.datasource.vdb.vector_type import VectorType from core.rag.datasource.vdb.vector_type import VectorType
from core.rag.embedding.embedding_base import Embeddings
from core.rag.models.document import Document from core.rag.models.document import Document
from models.dataset import Dataset from models.dataset import Dataset

View File

@ -9,11 +9,11 @@ from opensearchpy.helpers import BulkIndexError
from pydantic import BaseModel, model_validator from pydantic import BaseModel, model_validator
from configs import dify_config from configs import dify_config
from core.rag.datasource.entity.embedding import Embeddings
from core.rag.datasource.vdb.field import Field from core.rag.datasource.vdb.field import Field
from core.rag.datasource.vdb.vector_base import BaseVector from core.rag.datasource.vdb.vector_base import BaseVector
from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory from core.rag.datasource.vdb.vector_factory import AbstractVectorFactory
from core.rag.datasource.vdb.vector_type import VectorType from core.rag.datasource.vdb.vector_type import VectorType
from core.rag.embedding.embedding_base import Embeddings
from core.rag.models.document import Document from core.rag.models.document import Document
from extensions.ext_redis import redis_client from extensions.ext_redis import redis_client
from models.dataset import Dataset from models.dataset import Dataset

Some files were not shown because too many files have changed in this diff Show More