diff --git a/api/controllers/console/app/completion.py b/api/controllers/console/app/completion.py index 6fe52ec28a..53de51c24d 100644 --- a/api/controllers/console/app/completion.py +++ b/api/controllers/console/app/completion.py @@ -17,6 +17,7 @@ from controllers.console.app.error import ( from controllers.console.app.wraps import get_app_model from controllers.console.setup import setup_required from controllers.console.wraps import account_initialization_required +from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError from core.app.apps.base_app_queue_manager import AppQueueManager from core.app.entities.app_invoke_entities import InvokeFrom from core.errors.error import ( @@ -31,6 +32,7 @@ from libs.helper import uuid_value from libs.login import login_required from models.model import AppMode from services.app_generate_service import AppGenerateService +from services.errors.llm import InvokeRateLimitError # define completion message api for user @@ -135,6 +137,8 @@ class ChatMessageApi(Resource): raise ProviderQuotaExceededError() except ModelCurrentlyNotSupportError: raise ProviderModelCurrentlyNotSupportError() + except InvokeRateLimitError as ex: + raise InvokeRateLimitHttpError(ex.description) except InvokeError as e: raise CompletionRequestError(e.description) except (ValueError, AppInvokeQuotaExceededError) as e: diff --git a/api/controllers/console/app/error.py b/api/controllers/console/app/error.py index 33d30c2051..1559f82d6e 100644 --- a/api/controllers/console/app/error.py +++ b/api/controllers/console/app/error.py @@ -119,3 +119,11 @@ class TracingConfigCheckError(BaseHTTPException): error_code = "trace_config_check_error" description = "Invalid Credentials." code = 400 + + +class InvokeRateLimitError(BaseHTTPException): + """Raised when the Invoke returns rate limit error.""" + + error_code = "rate_limit_error" + description = "Rate Limit Error" + code = 429 diff --git a/api/controllers/web/completion.py b/api/controllers/web/completion.py index bd636a0485..0837eedfb0 100644 --- a/api/controllers/web/completion.py +++ b/api/controllers/web/completion.py @@ -15,6 +15,7 @@ from controllers.web.error import ( ProviderNotInitializeError, ProviderQuotaExceededError, ) +from controllers.web.error import InvokeRateLimitError as InvokeRateLimitHttpError from controllers.web.wraps import WebApiResource from core.app.apps.base_app_queue_manager import AppQueueManager from core.app.entities.app_invoke_entities import InvokeFrom @@ -24,6 +25,7 @@ from libs import helper from libs.helper import uuid_value from models.model import AppMode from services.app_generate_service import AppGenerateService +from services.errors.llm import InvokeRateLimitError # define completion api for user @@ -120,6 +122,8 @@ class ChatApi(WebApiResource): raise ProviderQuotaExceededError() except ModelCurrentlyNotSupportError: raise ProviderModelCurrentlyNotSupportError() + except InvokeRateLimitError as ex: + raise InvokeRateLimitHttpError(ex.description) except InvokeError as e: raise CompletionRequestError(e.description) except ValueError as e: diff --git a/api/controllers/web/error.py b/api/controllers/web/error.py index 2f6bb39cf2..9fe5d08d54 100644 --- a/api/controllers/web/error.py +++ b/api/controllers/web/error.py @@ -125,3 +125,11 @@ class WebSSOAuthRequiredError(BaseHTTPException): error_code = "web_sso_auth_required" description = "Web SSO authentication required." code = 401 + + +class InvokeRateLimitError(BaseHTTPException): + """Raised when the Invoke returns rate limit error.""" + + error_code = "rate_limit_error" + description = "Rate Limit Error" + code = 429 diff --git a/api/core/errors/error.py b/api/core/errors/error.py index 859a747c12..53323a2eeb 100644 --- a/api/core/errors/error.py +++ b/api/core/errors/error.py @@ -43,3 +43,8 @@ class ModelCurrentlyNotSupportError(Exception): Custom exception raised when the model not support """ description = "Model Currently Not Support" + + +class InvokeRateLimitError(Exception): + """Raised when the Invoke returns rate limit error.""" + description = "Rate Limit Error" diff --git a/api/services/app_generate_service.py b/api/services/app_generate_service.py index 34fce4630e..747505977f 100644 --- a/api/services/app_generate_service.py +++ b/api/services/app_generate_service.py @@ -1,6 +1,8 @@ from collections.abc import Generator from typing import Any, Union +from openai._exceptions import RateLimitError + from configs import dify_config from core.app.apps.advanced_chat.app_generator import AdvancedChatAppGenerator from core.app.apps.agent_chat.app_generator import AgentChatAppGenerator @@ -10,6 +12,7 @@ from core.app.apps.workflow.app_generator import WorkflowAppGenerator from core.app.entities.app_invoke_entities import InvokeFrom from core.app.features.rate_limiting import RateLimit from models.model import Account, App, AppMode, EndUser +from services.errors.llm import InvokeRateLimitError from services.workflow_service import WorkflowService @@ -86,6 +89,8 @@ class AppGenerateService: ) else: raise ValueError(f"Invalid app mode {app_model.mode}") + except RateLimitError as e: + raise InvokeRateLimitError(str(e)) finally: if not streaming: rate_limit.exit(request_id) diff --git a/api/services/errors/llm.py b/api/services/errors/llm.py new file mode 100644 index 0000000000..e4fac6f745 --- /dev/null +++ b/api/services/errors/llm.py @@ -0,0 +1,19 @@ +from typing import Optional + + +class InvokeError(Exception): + """Base class for all LLM exceptions.""" + + description: Optional[str] = None + + def __init__(self, description: Optional[str] = None) -> None: + self.description = description + + def __str__(self): + return self.description or self.__class__.__name__ + + +class InvokeRateLimitError(InvokeError): + """Raised when the Invoke returns rate limit error.""" + + description = "Rate Limit Error"