dify/api/controllers/service_api/app/audio.py

import logging

from flask import request
from flask_restful import Resource, reqparse
from werkzeug.exceptions import InternalServerError

import services
from controllers.service_api import api
from controllers.service_api.app.error import (
    AppUnavailableError,
    AudioTooLargeError,
    CompletionRequestError,
    NoAudioUploadedError,
    ProviderModelCurrentlyNotSupportError,
    ProviderNotInitializeError,
    ProviderNotSupportSpeechToTextError,
    ProviderQuotaExceededError,
    UnsupportedAudioTypeError,
)
from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token
from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError
from core.model_runtime.errors.invoke import InvokeError
from models.model import App, EndUser
from services.audio_service import AudioService
from services.errors.audio import (
    AudioTooLargeServiceError,
    NoAudioUploadedServiceError,
    ProviderNotSupportSpeechToTextServiceError,
    UnsupportedAudioTypeServiceError,
)


class AudioApi(Resource):
    @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.FORM))
    def post(self, app_model: App, end_user: EndUser):
        file = request.files['file']

        try:
            response = AudioService.transcript_asr(
                app_model=app_model,
                file=file,
                end_user=end_user
            )

            return response
        except services.errors.app_model_config.AppModelConfigBrokenError:
            logging.exception("App model config broken.")
            raise AppUnavailableError()
        except NoAudioUploadedServiceError:
            raise NoAudioUploadedError()
        except AudioTooLargeServiceError as e:
            raise AudioTooLargeError(str(e))
        except UnsupportedAudioTypeServiceError:
            raise UnsupportedAudioTypeError()
        except ProviderNotSupportSpeechToTextServiceError:
            raise ProviderNotSupportSpeechToTextError()
        except ProviderTokenNotInitError as ex:
            raise ProviderNotInitializeError(ex.description)
        except QuotaExceededError:
            raise ProviderQuotaExceededError()
        except ModelCurrentlyNotSupportError:
            raise ProviderModelCurrentlyNotSupportError()
        except InvokeError as e:
            raise CompletionRequestError(e.description)
        except ValueError as e:
            raise e
        except Exception as e:
            logging.exception("internal server error.")
            raise InternalServerError()


class TextApi(Resource):
    @validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON))
    def post(self, app_model: App, end_user: EndUser):
        parser = reqparse.RequestParser()
        parser.add_argument('text', type=str, required=True, nullable=False, location='json')
        parser.add_argument('voice', type=str, location='json')
        parser.add_argument('streaming', type=bool, required=False, nullable=False, location='json')
        args = parser.parse_args()

        try:
            response = AudioService.transcript_tts(
                app_model=app_model,
                text=args['text'],
                end_user=end_user,
                voice=args.get('voice'),
                streaming=args['streaming']
            )

            return response
        except services.errors.app_model_config.AppModelConfigBrokenError:
            logging.exception("App model config broken.")
            raise AppUnavailableError()
        except NoAudioUploadedServiceError:
            raise NoAudioUploadedError()
        except AudioTooLargeServiceError as e:
            raise AudioTooLargeError(str(e))
        except UnsupportedAudioTypeServiceError:
            raise UnsupportedAudioTypeError()
        except ProviderNotSupportSpeechToTextServiceError:
            raise ProviderNotSupportSpeechToTextError()
        except ProviderTokenNotInitError as ex:
            raise ProviderNotInitializeError(ex.description)
        except QuotaExceededError:
            raise ProviderQuotaExceededError()
        except ModelCurrentlyNotSupportError:
            raise ProviderModelCurrentlyNotSupportError()
        except InvokeError as e:
            raise CompletionRequestError(e.description)
        except ValueError as e:
            raise e
        except Exception as e:
            logging.exception("internal server error.")
            raise InternalServerError()


api.add_resource(AudioApi, '/audio-to-text')
api.add_resource(TextApi, '/text-to-audio')
Feat/chat support voice input (#532) 2023-07-07 17:50:42 +08:00			`import logging`

enhancement: introduce Ruff for Python linter for reordering and removing unused imports with automated pre-commit and sytle check (#2366) 2024-02-06 13:21:13 +08:00			`from flask import request`
fix: missing default user for APP service api (#2606) 2024-02-28 16:09:56 +08:00			`from flask_restful import Resource, reqparse`
enhancement: introduce Ruff for Python linter for reordering and removing unused imports with automated pre-commit and sytle check (#2366) 2024-02-06 13:21:13 +08:00			`from werkzeug.exceptions import InternalServerError`

Feat/chat support voice input (#532) 2023-07-07 17:50:42 +08:00			`import services`
			`from controllers.service_api import api`
enhancement: introduce Ruff for Python linter for reordering and removing unused imports with automated pre-commit and sytle check (#2366) 2024-02-06 13:21:13 +08:00			`from controllers.service_api.app.error import (`
			`AppUnavailableError,`
			`AudioTooLargeError,`
			`CompletionRequestError,`
			`NoAudioUploadedError,`
			`ProviderModelCurrentlyNotSupportError,`
			`ProviderNotInitializeError,`
			`ProviderNotSupportSpeechToTextError,`
			`ProviderQuotaExceededError,`
			`UnsupportedAudioTypeError,`
			`)`
fix: missing default user for APP service api (#2606) 2024-02-28 16:09:56 +08:00			`from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token`
improve: introduce isort for linting Python imports (#1983) 2024-01-12 12:34:01 +08:00			`from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError`
Model Runtime (#1858) Co-authored-by: StyleZhang <jasonapring2015@outlook.com> Co-authored-by: Garfield Dai <dai.hai@foxmail.com> Co-authored-by: chenhe <guchenhe@gmail.com> Co-authored-by: jyong <jyong@dify.ai> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Yeuoly <admin@srmxy.cn> 2024-01-02 23:42:00 +08:00			`from core.model_runtime.errors.invoke import InvokeError`
FEAT: NEW WORKFLOW ENGINE (#3160) Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Yeuoly <admin@srmxy.cn> Co-authored-by: JzoNg <jzongcode@gmail.com> Co-authored-by: StyleZhang <jasonapring2015@outlook.com> Co-authored-by: jyong <jyong@dify.ai> Co-authored-by: nite-knite <nkCoding@gmail.com> Co-authored-by: jyong <718720800@qq.com> 2024-04-08 18:51:46 +08:00			`from models.model import App, EndUser`
Feat/chat support voice input (#532) 2023-07-07 17:50:42 +08:00			`from services.audio_service import AudioService`
enhancement: introduce Ruff for Python linter for reordering and removing unused imports with automated pre-commit and sytle check (#2366) 2024-02-06 13:21:13 +08:00			`from services.errors.audio import (`
			`AudioTooLargeServiceError,`
			`NoAudioUploadedServiceError,`
			`ProviderNotSupportSpeechToTextServiceError,`
			`UnsupportedAudioTypeServiceError,`
			`)`
improve: introduce isort for linting Python imports (#1983) 2024-01-12 12:34:01 +08:00
Feat/chat support voice input (#532) 2023-07-07 17:50:42 +08:00
fix: missing default user for APP service api (#2606) 2024-02-28 16:09:56 +08:00			`class AudioApi(Resource):`
			`@validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.FORM))`
			`def post(self, app_model: App, end_user: EndUser):`
Feat/chat support voice input (#532) 2023-07-07 17:50:42 +08:00			`file = request.files['file']`

			`try:`
tts models support (#2033) Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: Yeuoly <45712896+Yeuoly@users.noreply.github.com> 2024-01-24 01:05:37 +08:00			`response = AudioService.transcript_asr(`
FEAT: NEW WORKFLOW ENGINE (#3160) Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Yeuoly <admin@srmxy.cn> Co-authored-by: JzoNg <jzongcode@gmail.com> Co-authored-by: StyleZhang <jasonapring2015@outlook.com> Co-authored-by: jyong <jyong@dify.ai> Co-authored-by: nite-knite <nkCoding@gmail.com> Co-authored-by: jyong <718720800@qq.com> 2024-04-08 18:51:46 +08:00			`app_model=app_model,`
Feat/chat support voice input (#532) 2023-07-07 17:50:42 +08:00			`file=file,`
FEAT: NEW WORKFLOW ENGINE (#3160) Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Yeuoly <admin@srmxy.cn> Co-authored-by: JzoNg <jzongcode@gmail.com> Co-authored-by: StyleZhang <jasonapring2015@outlook.com> Co-authored-by: jyong <jyong@dify.ai> Co-authored-by: nite-knite <nkCoding@gmail.com> Co-authored-by: jyong <718720800@qq.com> 2024-04-08 18:51:46 +08:00			`end_user=end_user`
tts models support (#2033) Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: Yeuoly <45712896+Yeuoly@users.noreply.github.com> 2024-01-24 01:05:37 +08:00			`)`

			`return response`
			`except services.errors.app_model_config.AppModelConfigBrokenError:`
			`logging.exception("App model config broken.")`
			`raise AppUnavailableError()`
			`except NoAudioUploadedServiceError:`
			`raise NoAudioUploadedError()`
			`except AudioTooLargeServiceError as e:`
			`raise AudioTooLargeError(str(e))`
			`except UnsupportedAudioTypeServiceError:`
			`raise UnsupportedAudioTypeError()`
			`except ProviderNotSupportSpeechToTextServiceError:`
			`raise ProviderNotSupportSpeechToTextError()`
			`except ProviderTokenNotInitError as ex:`
			`raise ProviderNotInitializeError(ex.description)`
			`except QuotaExceededError:`
			`raise ProviderQuotaExceededError()`
			`except ModelCurrentlyNotSupportError:`
			`raise ProviderModelCurrentlyNotSupportError()`
			`except InvokeError as e:`
			`raise CompletionRequestError(e.description)`
			`except ValueError as e:`
			`raise e`
			`except Exception as e:`
			`logging.exception("internal server error.")`
			`raise InternalServerError()`


fix: missing default user for APP service api (#2606) 2024-02-28 16:09:56 +08:00			`class TextApi(Resource):`
Fix tts api err (#2809) Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> 2024-03-13 15:38:10 +08:00			`@validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON))`
fix: missing default user for APP service api (#2606) 2024-02-28 16:09:56 +08:00			`def post(self, app_model: App, end_user: EndUser):`
tts models support (#2033) Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: Yeuoly <45712896+Yeuoly@users.noreply.github.com> 2024-01-24 01:05:37 +08:00			`parser = reqparse.RequestParser()`
			`parser.add_argument('text', type=str, required=True, nullable=False, location='json')`
FEAT: NEW WORKFLOW ENGINE (#3160) Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Yeuoly <admin@srmxy.cn> Co-authored-by: JzoNg <jzongcode@gmail.com> Co-authored-by: StyleZhang <jasonapring2015@outlook.com> Co-authored-by: jyong <jyong@dify.ai> Co-authored-by: nite-knite <nkCoding@gmail.com> Co-authored-by: jyong <718720800@qq.com> 2024-04-08 18:51:46 +08:00			`parser.add_argument('voice', type=str, location='json')`
Add tts document&fix bug (#2156) Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: Yeuoly <45712896+Yeuoly@users.noreply.github.com> 2024-01-24 23:04:14 +08:00			`parser.add_argument('streaming', type=bool, required=False, nullable=False, location='json')`
tts models support (#2033) Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: Yeuoly <45712896+Yeuoly@users.noreply.github.com> 2024-01-24 01:05:37 +08:00			`args = parser.parse_args()`

			`try:`
			`response = AudioService.transcript_tts(`
FEAT: NEW WORKFLOW ENGINE (#3160) Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Yeuoly <admin@srmxy.cn> Co-authored-by: JzoNg <jzongcode@gmail.com> Co-authored-by: StyleZhang <jasonapring2015@outlook.com> Co-authored-by: jyong <jyong@dify.ai> Co-authored-by: nite-knite <nkCoding@gmail.com> Co-authored-by: jyong <718720800@qq.com> 2024-04-08 18:51:46 +08:00			`app_model=app_model,`
tts models support (#2033) Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: Yeuoly <45712896+Yeuoly@users.noreply.github.com> 2024-01-24 01:05:37 +08:00			`text=args['text'],`
FEAT: NEW WORKFLOW ENGINE (#3160) Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Yeuoly <admin@srmxy.cn> Co-authored-by: JzoNg <jzongcode@gmail.com> Co-authored-by: StyleZhang <jasonapring2015@outlook.com> Co-authored-by: jyong <jyong@dify.ai> Co-authored-by: nite-knite <nkCoding@gmail.com> Co-authored-by: jyong <718720800@qq.com> 2024-04-08 18:51:46 +08:00			`end_user=end_user,`
			`voice=args.get('voice'),`
Add tts document&fix bug (#2156) Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: Yeuoly <45712896+Yeuoly@users.noreply.github.com> 2024-01-24 23:04:14 +08:00			`streaming=args['streaming']`
Feat/chat support voice input (#532) 2023-07-07 17:50:42 +08:00			`)`

			`return response`
			`except services.errors.app_model_config.AppModelConfigBrokenError:`
			`logging.exception("App model config broken.")`
			`raise AppUnavailableError()`
			`except NoAudioUploadedServiceError:`
			`raise NoAudioUploadedError()`
			`except AudioTooLargeServiceError as e:`
			`raise AudioTooLargeError(str(e))`
			`except UnsupportedAudioTypeServiceError:`
			`raise UnsupportedAudioTypeError()`
			`except ProviderNotSupportSpeechToTextServiceError:`
			`raise ProviderNotSupportSpeechToTextError()`
feat: claude api support (#572) 2023-07-17 00:14:19 +08:00			`except ProviderTokenNotInitError as ex:`
			`raise ProviderNotInitializeError(ex.description)`
Feat/chat support voice input (#532) 2023-07-07 17:50:42 +08:00			`except QuotaExceededError:`
			`raise ProviderQuotaExceededError()`
			`except ModelCurrentlyNotSupportError:`
			`raise ProviderModelCurrentlyNotSupportError()`
Model Runtime (#1858) Co-authored-by: StyleZhang <jasonapring2015@outlook.com> Co-authored-by: Garfield Dai <dai.hai@foxmail.com> Co-authored-by: chenhe <guchenhe@gmail.com> Co-authored-by: jyong <jyong@dify.ai> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Yeuoly <admin@srmxy.cn> 2024-01-02 23:42:00 +08:00			`except InvokeError as e:`
feat: optimize invoke errors (#1922) 2024-01-04 17:49:55 +08:00			`raise CompletionRequestError(e.description)`
Feat/chat support voice input (#532) 2023-07-07 17:50:42 +08:00			`except ValueError as e:`
			`raise e`
			`except Exception as e:`
			`logging.exception("internal server error.")`
			`raise InternalServerError()`
tts models support (#2033) Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: Yeuoly <45712896+Yeuoly@users.noreply.github.com> 2024-01-24 01:05:37 +08:00

			`api.add_resource(AudioApi, '/audio-to-text')`
			`api.add_resource(TextApi, '/text-to-audio')`