mirror of
https://github.com/langgenius/dify.git
synced 2024-11-16 03:32:23 +08:00
feat:add tts-streaming config and future (#5492)
This commit is contained in:
parent
b29a36f461
commit
6ef401a9f0
4
api/constants/tts_auto_play_timeout.py
Normal file
4
api/constants/tts_auto_play_timeout.py
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
TTS_AUTO_PLAY_TIMEOUT = 5
|
||||||
|
|
||||||
|
# sleep 20 ms ( 40ms => 1280 byte audio file,20ms => 640 byte audio file)
|
||||||
|
TTS_AUTO_PLAY_YIELD_CPU_TIME = 0.02
|
|
@ -81,15 +81,36 @@ class ChatMessageTextApi(Resource):
|
||||||
@account_initialization_required
|
@account_initialization_required
|
||||||
@get_app_model
|
@get_app_model
|
||||||
def post(self, app_model):
|
def post(self, app_model):
|
||||||
|
from werkzeug.exceptions import InternalServerError
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
parser = reqparse.RequestParser()
|
||||||
|
parser.add_argument('message_id', type=str, location='json')
|
||||||
|
parser.add_argument('text', type=str, location='json')
|
||||||
|
parser.add_argument('voice', type=str, location='json')
|
||||||
|
parser.add_argument('streaming', type=bool, location='json')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
message_id = args.get('message_id', None)
|
||||||
|
text = args.get('text', None)
|
||||||
|
if (app_model.mode in [AppMode.ADVANCED_CHAT.value, AppMode.WORKFLOW.value]
|
||||||
|
and app_model.workflow
|
||||||
|
and app_model.workflow.features_dict):
|
||||||
|
text_to_speech = app_model.workflow.features_dict.get('text_to_speech')
|
||||||
|
voice = args.get('voice') if args.get('voice') else text_to_speech.get('voice')
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
voice = args.get('voice') if args.get('voice') else app_model.app_model_config.text_to_speech_dict.get(
|
||||||
|
'voice')
|
||||||
|
except Exception:
|
||||||
|
voice = None
|
||||||
response = AudioService.transcript_tts(
|
response = AudioService.transcript_tts(
|
||||||
app_model=app_model,
|
app_model=app_model,
|
||||||
text=request.form['text'],
|
text=text,
|
||||||
voice=request.form['voice'],
|
message_id=message_id,
|
||||||
streaming=False
|
voice=voice
|
||||||
)
|
)
|
||||||
|
return response
|
||||||
return {'data': response.data.decode('latin1')}
|
|
||||||
except services.errors.app_model_config.AppModelConfigBrokenError:
|
except services.errors.app_model_config.AppModelConfigBrokenError:
|
||||||
logging.exception("App model config broken.")
|
logging.exception("App model config broken.")
|
||||||
raise AppUnavailableError()
|
raise AppUnavailableError()
|
||||||
|
|
|
@ -19,6 +19,7 @@ from controllers.console.app.error import (
|
||||||
from controllers.console.explore.wraps import InstalledAppResource
|
from controllers.console.explore.wraps import InstalledAppResource
|
||||||
from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError
|
from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError
|
||||||
from core.model_runtime.errors.invoke import InvokeError
|
from core.model_runtime.errors.invoke import InvokeError
|
||||||
|
from models.model import AppMode
|
||||||
from services.audio_service import AudioService
|
from services.audio_service import AudioService
|
||||||
from services.errors.audio import (
|
from services.errors.audio import (
|
||||||
AudioTooLargeServiceError,
|
AudioTooLargeServiceError,
|
||||||
|
@ -70,16 +71,33 @@ class ChatAudioApi(InstalledAppResource):
|
||||||
|
|
||||||
class ChatTextApi(InstalledAppResource):
|
class ChatTextApi(InstalledAppResource):
|
||||||
def post(self, installed_app):
|
def post(self, installed_app):
|
||||||
app_model = installed_app.app
|
from flask_restful import reqparse
|
||||||
|
|
||||||
|
app_model = installed_app.app
|
||||||
try:
|
try:
|
||||||
|
parser = reqparse.RequestParser()
|
||||||
|
parser.add_argument('message_id', type=str, required=False, location='json')
|
||||||
|
parser.add_argument('voice', type=str, location='json')
|
||||||
|
parser.add_argument('streaming', type=bool, location='json')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
message_id = args.get('message_id')
|
||||||
|
if (app_model.mode in [AppMode.ADVANCED_CHAT.value, AppMode.WORKFLOW.value]
|
||||||
|
and app_model.workflow
|
||||||
|
and app_model.workflow.features_dict):
|
||||||
|
text_to_speech = app_model.workflow.features_dict.get('text_to_speech')
|
||||||
|
voice = args.get('voice') if args.get('voice') else text_to_speech.get('voice')
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
voice = args.get('voice') if args.get('voice') else app_model.app_model_config.text_to_speech_dict.get('voice')
|
||||||
|
except Exception:
|
||||||
|
voice = None
|
||||||
response = AudioService.transcript_tts(
|
response = AudioService.transcript_tts(
|
||||||
app_model=app_model,
|
app_model=app_model,
|
||||||
text=request.form['text'],
|
message_id=message_id,
|
||||||
voice=request.form['voice'] if request.form.get('voice') else app_model.app_model_config.text_to_speech_dict.get('voice'),
|
voice=voice
|
||||||
streaming=False
|
|
||||||
)
|
)
|
||||||
return {'data': response.data.decode('latin1')}
|
return response
|
||||||
except services.errors.app_model_config.AppModelConfigBrokenError:
|
except services.errors.app_model_config.AppModelConfigBrokenError:
|
||||||
logging.exception("App model config broken.")
|
logging.exception("App model config broken.")
|
||||||
raise AppUnavailableError()
|
raise AppUnavailableError()
|
||||||
|
@ -108,3 +126,5 @@ class ChatTextApi(InstalledAppResource):
|
||||||
|
|
||||||
api.add_resource(ChatAudioApi, '/installed-apps/<uuid:installed_app_id>/audio-to-text', endpoint='installed_app_audio')
|
api.add_resource(ChatAudioApi, '/installed-apps/<uuid:installed_app_id>/audio-to-text', endpoint='installed_app_audio')
|
||||||
api.add_resource(ChatTextApi, '/installed-apps/<uuid:installed_app_id>/text-to-audio', endpoint='installed_app_text')
|
api.add_resource(ChatTextApi, '/installed-apps/<uuid:installed_app_id>/text-to-audio', endpoint='installed_app_text')
|
||||||
|
# api.add_resource(ChatTextApiWithMessageId, '/installed-apps/<uuid:installed_app_id>/text-to-audio/message-id',
|
||||||
|
# endpoint='installed_app_text_with_message_id')
|
||||||
|
|
|
@ -20,7 +20,7 @@ from controllers.service_api.app.error import (
|
||||||
from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token
|
from controllers.service_api.wraps import FetchUserArg, WhereisUserArg, validate_app_token
|
||||||
from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError
|
from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError
|
||||||
from core.model_runtime.errors.invoke import InvokeError
|
from core.model_runtime.errors.invoke import InvokeError
|
||||||
from models.model import App, EndUser
|
from models.model import App, AppMode, EndUser
|
||||||
from services.audio_service import AudioService
|
from services.audio_service import AudioService
|
||||||
from services.errors.audio import (
|
from services.errors.audio import (
|
||||||
AudioTooLargeServiceError,
|
AudioTooLargeServiceError,
|
||||||
|
@ -72,19 +72,30 @@ class AudioApi(Resource):
|
||||||
class TextApi(Resource):
|
class TextApi(Resource):
|
||||||
@validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON))
|
@validate_app_token(fetch_user_arg=FetchUserArg(fetch_from=WhereisUserArg.JSON))
|
||||||
def post(self, app_model: App, end_user: EndUser):
|
def post(self, app_model: App, end_user: EndUser):
|
||||||
parser = reqparse.RequestParser()
|
|
||||||
parser.add_argument('text', type=str, required=True, nullable=False, location='json')
|
|
||||||
parser.add_argument('voice', type=str, location='json')
|
|
||||||
parser.add_argument('streaming', type=bool, required=False, nullable=False, location='json')
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
parser = reqparse.RequestParser()
|
||||||
|
parser.add_argument('message_id', type=str, required=False, location='json')
|
||||||
|
parser.add_argument('voice', type=str, location='json')
|
||||||
|
parser.add_argument('streaming', type=bool, location='json')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
message_id = args.get('message_id')
|
||||||
|
if (app_model.mode in [AppMode.ADVANCED_CHAT.value, AppMode.WORKFLOW.value]
|
||||||
|
and app_model.workflow
|
||||||
|
and app_model.workflow.features_dict):
|
||||||
|
text_to_speech = app_model.workflow.features_dict.get('text_to_speech')
|
||||||
|
voice = args.get('voice') if args.get('voice') else text_to_speech.get('voice')
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
voice = args.get('voice') if args.get('voice') else app_model.app_model_config.text_to_speech_dict.get(
|
||||||
|
'voice')
|
||||||
|
except Exception:
|
||||||
|
voice = None
|
||||||
response = AudioService.transcript_tts(
|
response = AudioService.transcript_tts(
|
||||||
app_model=app_model,
|
app_model=app_model,
|
||||||
text=args['text'],
|
message_id=message_id,
|
||||||
end_user=end_user,
|
end_user=end_user.external_user_id,
|
||||||
voice=args.get('voice'),
|
voice=voice
|
||||||
streaming=args['streaming']
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
|
@ -19,7 +19,7 @@ from controllers.web.error import (
|
||||||
from controllers.web.wraps import WebApiResource
|
from controllers.web.wraps import WebApiResource
|
||||||
from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError
|
from core.errors.error import ModelCurrentlyNotSupportError, ProviderTokenNotInitError, QuotaExceededError
|
||||||
from core.model_runtime.errors.invoke import InvokeError
|
from core.model_runtime.errors.invoke import InvokeError
|
||||||
from models.model import App
|
from models.model import App, AppMode
|
||||||
from services.audio_service import AudioService
|
from services.audio_service import AudioService
|
||||||
from services.errors.audio import (
|
from services.errors.audio import (
|
||||||
AudioTooLargeServiceError,
|
AudioTooLargeServiceError,
|
||||||
|
@ -69,16 +69,35 @@ class AudioApi(WebApiResource):
|
||||||
|
|
||||||
class TextApi(WebApiResource):
|
class TextApi(WebApiResource):
|
||||||
def post(self, app_model: App, end_user):
|
def post(self, app_model: App, end_user):
|
||||||
|
from flask_restful import reqparse
|
||||||
try:
|
try:
|
||||||
|
parser = reqparse.RequestParser()
|
||||||
|
parser.add_argument('message_id', type=str, required=False, location='json')
|
||||||
|
parser.add_argument('voice', type=str, location='json')
|
||||||
|
parser.add_argument('streaming', type=bool, location='json')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
message_id = args.get('message_id')
|
||||||
|
if (app_model.mode in [AppMode.ADVANCED_CHAT.value, AppMode.WORKFLOW.value]
|
||||||
|
and app_model.workflow
|
||||||
|
and app_model.workflow.features_dict):
|
||||||
|
text_to_speech = app_model.workflow.features_dict.get('text_to_speech')
|
||||||
|
voice = args.get('voice') if args.get('voice') else text_to_speech.get('voice')
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
voice = args.get('voice') if args.get(
|
||||||
|
'voice') else app_model.app_model_config.text_to_speech_dict.get('voice')
|
||||||
|
except Exception:
|
||||||
|
voice = None
|
||||||
|
|
||||||
response = AudioService.transcript_tts(
|
response = AudioService.transcript_tts(
|
||||||
app_model=app_model,
|
app_model=app_model,
|
||||||
text=request.form['text'],
|
message_id=message_id,
|
||||||
end_user=end_user.external_user_id,
|
end_user=end_user.external_user_id,
|
||||||
voice=request.form['voice'] if request.form.get('voice') else None,
|
voice=voice
|
||||||
streaming=False
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return {'data': response.data.decode('latin1')}
|
return response
|
||||||
except services.errors.app_model_config.AppModelConfigBrokenError:
|
except services.errors.app_model_config.AppModelConfigBrokenError:
|
||||||
logging.exception("App model config broken.")
|
logging.exception("App model config broken.")
|
||||||
raise AppUnavailableError()
|
raise AppUnavailableError()
|
||||||
|
|
135
api/core/app/apps/advanced_chat/app_generator_tts_publisher.py
Normal file
135
api/core/app/apps/advanced_chat/app_generator_tts_publisher.py
Normal file
|
@ -0,0 +1,135 @@
|
||||||
|
import base64
|
||||||
|
import concurrent.futures
|
||||||
|
import logging
|
||||||
|
import queue
|
||||||
|
import re
|
||||||
|
import threading
|
||||||
|
|
||||||
|
from core.app.entities.queue_entities import QueueAgentMessageEvent, QueueLLMChunkEvent, QueueTextChunkEvent
|
||||||
|
from core.model_manager import ModelManager
|
||||||
|
from core.model_runtime.entities.model_entities import ModelType
|
||||||
|
|
||||||
|
|
||||||
|
class AudioTrunk:
|
||||||
|
def __init__(self, status: str, audio):
|
||||||
|
self.audio = audio
|
||||||
|
self.status = status
|
||||||
|
|
||||||
|
|
||||||
|
def _invoiceTTS(text_content: str, model_instance, tenant_id: str, voice: str):
|
||||||
|
if not text_content or text_content.isspace():
|
||||||
|
return
|
||||||
|
return model_instance.invoke_tts(
|
||||||
|
content_text=text_content.strip(),
|
||||||
|
user="responding_tts",
|
||||||
|
tenant_id=tenant_id,
|
||||||
|
voice=voice
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _process_future(future_queue, audio_queue):
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
future = future_queue.get()
|
||||||
|
if future is None:
|
||||||
|
break
|
||||||
|
for audio in future.result():
|
||||||
|
audio_base64 = base64.b64encode(bytes(audio))
|
||||||
|
audio_queue.put(AudioTrunk("responding", audio=audio_base64))
|
||||||
|
except Exception as e:
|
||||||
|
logging.getLogger(__name__).warning(e)
|
||||||
|
break
|
||||||
|
audio_queue.put(AudioTrunk("finish", b''))
|
||||||
|
|
||||||
|
|
||||||
|
class AppGeneratorTTSPublisher:
|
||||||
|
|
||||||
|
def __init__(self, tenant_id: str, voice: str):
|
||||||
|
self.logger = logging.getLogger(__name__)
|
||||||
|
self.tenant_id = tenant_id
|
||||||
|
self.msg_text = ''
|
||||||
|
self._audio_queue = queue.Queue()
|
||||||
|
self._msg_queue = queue.Queue()
|
||||||
|
self.match = re.compile(r'[。.!?]')
|
||||||
|
self.model_manager = ModelManager()
|
||||||
|
self.model_instance = self.model_manager.get_default_model_instance(
|
||||||
|
tenant_id=self.tenant_id,
|
||||||
|
model_type=ModelType.TTS
|
||||||
|
)
|
||||||
|
self.voices = self.model_instance.get_tts_voices()
|
||||||
|
values = [voice.get('value') for voice in self.voices]
|
||||||
|
self.voice = voice
|
||||||
|
if not voice or voice not in values:
|
||||||
|
self.voice = self.voices[0].get('value')
|
||||||
|
self.MAX_SENTENCE = 2
|
||||||
|
self._last_audio_event = None
|
||||||
|
self._runtime_thread = threading.Thread(target=self._runtime).start()
|
||||||
|
self.executor = concurrent.futures.ThreadPoolExecutor(max_workers=3)
|
||||||
|
|
||||||
|
def publish(self, message):
|
||||||
|
try:
|
||||||
|
self._msg_queue.put(message)
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(e)
|
||||||
|
|
||||||
|
def _runtime(self):
|
||||||
|
future_queue = queue.Queue()
|
||||||
|
threading.Thread(target=_process_future, args=(future_queue, self._audio_queue)).start()
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
message = self._msg_queue.get()
|
||||||
|
if message is None:
|
||||||
|
if self.msg_text and len(self.msg_text.strip()) > 0:
|
||||||
|
futures_result = self.executor.submit(_invoiceTTS, self.msg_text,
|
||||||
|
self.model_instance, self.tenant_id, self.voice)
|
||||||
|
future_queue.put(futures_result)
|
||||||
|
break
|
||||||
|
elif isinstance(message.event, QueueAgentMessageEvent | QueueLLMChunkEvent):
|
||||||
|
self.msg_text += message.event.chunk.delta.message.content
|
||||||
|
elif isinstance(message.event, QueueTextChunkEvent):
|
||||||
|
self.msg_text += message.event.text
|
||||||
|
self.last_message = message
|
||||||
|
sentence_arr, text_tmp = self._extract_sentence(self.msg_text)
|
||||||
|
if len(sentence_arr) >= min(self.MAX_SENTENCE, 7):
|
||||||
|
self.MAX_SENTENCE += 1
|
||||||
|
text_content = ''.join(sentence_arr)
|
||||||
|
futures_result = self.executor.submit(_invoiceTTS, text_content,
|
||||||
|
self.model_instance,
|
||||||
|
self.tenant_id,
|
||||||
|
self.voice)
|
||||||
|
future_queue.put(futures_result)
|
||||||
|
if text_tmp:
|
||||||
|
self.msg_text = text_tmp
|
||||||
|
else:
|
||||||
|
self.msg_text = ''
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(e)
|
||||||
|
break
|
||||||
|
future_queue.put(None)
|
||||||
|
|
||||||
|
def checkAndGetAudio(self) -> AudioTrunk | None:
|
||||||
|
try:
|
||||||
|
if self._last_audio_event and self._last_audio_event.status == "finish":
|
||||||
|
if self.executor:
|
||||||
|
self.executor.shutdown(wait=False)
|
||||||
|
return self.last_message
|
||||||
|
audio = self._audio_queue.get_nowait()
|
||||||
|
if audio and audio.status == "finish":
|
||||||
|
self.executor.shutdown(wait=False)
|
||||||
|
self._runtime_thread = None
|
||||||
|
if audio:
|
||||||
|
self._last_audio_event = audio
|
||||||
|
return audio
|
||||||
|
except queue.Empty:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _extract_sentence(self, org_text):
|
||||||
|
tx = self.match.finditer(org_text)
|
||||||
|
start = 0
|
||||||
|
result = []
|
||||||
|
for i in tx:
|
||||||
|
end = i.regs[0][1]
|
||||||
|
result.append(org_text[start:end])
|
||||||
|
start = end
|
||||||
|
return result, org_text[start:]
|
|
@ -4,6 +4,8 @@ import time
|
||||||
from collections.abc import Generator
|
from collections.abc import Generator
|
||||||
from typing import Any, Optional, Union, cast
|
from typing import Any, Optional, Union, cast
|
||||||
|
|
||||||
|
from constants.tts_auto_play_timeout import TTS_AUTO_PLAY_TIMEOUT, TTS_AUTO_PLAY_YIELD_CPU_TIME
|
||||||
|
from core.app.apps.advanced_chat.app_generator_tts_publisher import AppGeneratorTTSPublisher, AudioTrunk
|
||||||
from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
|
from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
|
||||||
from core.app.entities.app_invoke_entities import (
|
from core.app.entities.app_invoke_entities import (
|
||||||
AdvancedChatAppGenerateEntity,
|
AdvancedChatAppGenerateEntity,
|
||||||
|
@ -33,6 +35,8 @@ from core.app.entities.task_entities import (
|
||||||
ChatbotAppStreamResponse,
|
ChatbotAppStreamResponse,
|
||||||
ChatflowStreamGenerateRoute,
|
ChatflowStreamGenerateRoute,
|
||||||
ErrorStreamResponse,
|
ErrorStreamResponse,
|
||||||
|
MessageAudioEndStreamResponse,
|
||||||
|
MessageAudioStreamResponse,
|
||||||
MessageEndStreamResponse,
|
MessageEndStreamResponse,
|
||||||
StreamResponse,
|
StreamResponse,
|
||||||
)
|
)
|
||||||
|
@ -71,13 +75,13 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||||
_iteration_nested_relations: dict[str, list[str]]
|
_iteration_nested_relations: dict[str, list[str]]
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, application_generate_entity: AdvancedChatAppGenerateEntity,
|
self, application_generate_entity: AdvancedChatAppGenerateEntity,
|
||||||
workflow: Workflow,
|
workflow: Workflow,
|
||||||
queue_manager: AppQueueManager,
|
queue_manager: AppQueueManager,
|
||||||
conversation: Conversation,
|
conversation: Conversation,
|
||||||
message: Message,
|
message: Message,
|
||||||
user: Union[Account, EndUser],
|
user: Union[Account, EndUser],
|
||||||
stream: bool
|
stream: bool
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Initialize AdvancedChatAppGenerateTaskPipeline.
|
Initialize AdvancedChatAppGenerateTaskPipeline.
|
||||||
|
@ -129,7 +133,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||||
self._application_generate_entity.query
|
self._application_generate_entity.query
|
||||||
)
|
)
|
||||||
|
|
||||||
generator = self._process_stream_response(
|
generator = self._wrapper_process_stream_response(
|
||||||
trace_manager=self._application_generate_entity.trace_manager
|
trace_manager=self._application_generate_entity.trace_manager
|
||||||
)
|
)
|
||||||
if self._stream:
|
if self._stream:
|
||||||
|
@ -138,7 +142,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||||
return self._to_blocking_response(generator)
|
return self._to_blocking_response(generator)
|
||||||
|
|
||||||
def _to_blocking_response(self, generator: Generator[StreamResponse, None, None]) \
|
def _to_blocking_response(self, generator: Generator[StreamResponse, None, None]) \
|
||||||
-> ChatbotAppBlockingResponse:
|
-> ChatbotAppBlockingResponse:
|
||||||
"""
|
"""
|
||||||
Process blocking response.
|
Process blocking response.
|
||||||
:return:
|
:return:
|
||||||
|
@ -169,7 +173,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||||
raise Exception('Queue listening stopped unexpectedly.')
|
raise Exception('Queue listening stopped unexpectedly.')
|
||||||
|
|
||||||
def _to_stream_response(self, generator: Generator[StreamResponse, None, None]) \
|
def _to_stream_response(self, generator: Generator[StreamResponse, None, None]) \
|
||||||
-> Generator[ChatbotAppStreamResponse, None, None]:
|
-> Generator[ChatbotAppStreamResponse, None, None]:
|
||||||
"""
|
"""
|
||||||
To stream response.
|
To stream response.
|
||||||
:return:
|
:return:
|
||||||
|
@ -182,14 +186,68 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||||
stream_response=stream_response
|
stream_response=stream_response
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _listenAudioMsg(self, publisher, task_id: str):
|
||||||
|
if not publisher:
|
||||||
|
return None
|
||||||
|
audio_msg: AudioTrunk = publisher.checkAndGetAudio()
|
||||||
|
if audio_msg and audio_msg.status != "finish":
|
||||||
|
return MessageAudioStreamResponse(audio=audio_msg.audio, task_id=task_id)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _wrapper_process_stream_response(self, trace_manager: Optional[TraceQueueManager] = None) -> \
|
||||||
|
Generator[StreamResponse, None, None]:
|
||||||
|
|
||||||
|
publisher = None
|
||||||
|
task_id = self._application_generate_entity.task_id
|
||||||
|
tenant_id = self._application_generate_entity.app_config.tenant_id
|
||||||
|
features_dict = self._workflow.features_dict
|
||||||
|
|
||||||
|
if features_dict.get('text_to_speech') and features_dict['text_to_speech'].get('enabled') and features_dict[
|
||||||
|
'text_to_speech'].get('autoPlay') == 'enabled':
|
||||||
|
publisher = AppGeneratorTTSPublisher(tenant_id, features_dict['text_to_speech'].get('voice'))
|
||||||
|
for response in self._process_stream_response(publisher=publisher, trace_manager=trace_manager):
|
||||||
|
while True:
|
||||||
|
audio_response = self._listenAudioMsg(publisher, task_id=task_id)
|
||||||
|
if audio_response:
|
||||||
|
yield audio_response
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
yield response
|
||||||
|
|
||||||
|
start_listener_time = time.time()
|
||||||
|
# timeout
|
||||||
|
while (time.time() - start_listener_time) < TTS_AUTO_PLAY_TIMEOUT:
|
||||||
|
try:
|
||||||
|
if not publisher:
|
||||||
|
break
|
||||||
|
audio_trunk = publisher.checkAndGetAudio()
|
||||||
|
if audio_trunk is None:
|
||||||
|
# release cpu
|
||||||
|
# sleep 20 ms ( 40ms => 1280 byte audio file,20ms => 640 byte audio file)
|
||||||
|
time.sleep(TTS_AUTO_PLAY_YIELD_CPU_TIME)
|
||||||
|
continue
|
||||||
|
if audio_trunk.status == "finish":
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
start_listener_time = time.time()
|
||||||
|
yield MessageAudioStreamResponse(audio=audio_trunk.audio, task_id=task_id)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(e)
|
||||||
|
break
|
||||||
|
yield MessageAudioEndStreamResponse(audio='', task_id=task_id)
|
||||||
|
|
||||||
def _process_stream_response(
|
def _process_stream_response(
|
||||||
self, trace_manager: Optional[TraceQueueManager] = None
|
self,
|
||||||
|
publisher: AppGeneratorTTSPublisher,
|
||||||
|
trace_manager: Optional[TraceQueueManager] = None
|
||||||
) -> Generator[StreamResponse, None, None]:
|
) -> Generator[StreamResponse, None, None]:
|
||||||
"""
|
"""
|
||||||
Process stream response.
|
Process stream response.
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
for message in self._queue_manager.listen():
|
for message in self._queue_manager.listen():
|
||||||
|
if publisher:
|
||||||
|
publisher.publish(message=message)
|
||||||
event = message.event
|
event = message.event
|
||||||
|
|
||||||
if isinstance(event, QueueErrorEvent):
|
if isinstance(event, QueueErrorEvent):
|
||||||
|
@ -301,7 +359,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if not self._is_stream_out_support(
|
if not self._is_stream_out_support(
|
||||||
event=event
|
event=event
|
||||||
):
|
):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
@ -318,7 +376,8 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||||
yield self._ping_stream_response()
|
yield self._ping_stream_response()
|
||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
|
if publisher:
|
||||||
|
publisher.publish(None)
|
||||||
if self._conversation_name_generate_thread:
|
if self._conversation_name_generate_thread:
|
||||||
self._conversation_name_generate_thread.join()
|
self._conversation_name_generate_thread.join()
|
||||||
|
|
||||||
|
@ -402,7 +461,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||||
return stream_generate_routes
|
return stream_generate_routes
|
||||||
|
|
||||||
def _get_answer_start_at_node_ids(self, graph: dict, target_node_id: str) \
|
def _get_answer_start_at_node_ids(self, graph: dict, target_node_id: str) \
|
||||||
-> list[str]:
|
-> list[str]:
|
||||||
"""
|
"""
|
||||||
Get answer start at node id.
|
Get answer start at node id.
|
||||||
:param graph: graph
|
:param graph: graph
|
||||||
|
@ -457,7 +516,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||||
start_node_id = target_node_id
|
start_node_id = target_node_id
|
||||||
start_node_ids.append(start_node_id)
|
start_node_ids.append(start_node_id)
|
||||||
elif node_type == NodeType.START.value or \
|
elif node_type == NodeType.START.value or \
|
||||||
node_iteration_id is not None and iteration_start_node_id == source_node.get('id'):
|
node_iteration_id is not None and iteration_start_node_id == source_node.get('id'):
|
||||||
start_node_id = source_node_id
|
start_node_id = source_node_id
|
||||||
start_node_ids.append(start_node_id)
|
start_node_ids.append(start_node_id)
|
||||||
else:
|
else:
|
||||||
|
@ -515,7 +574,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||||
|
|
||||||
# all route chunks are generated
|
# all route chunks are generated
|
||||||
if self._task_state.current_stream_generate_state.current_route_position == len(
|
if self._task_state.current_stream_generate_state.current_route_position == len(
|
||||||
self._task_state.current_stream_generate_state.generate_route
|
self._task_state.current_stream_generate_state.generate_route
|
||||||
):
|
):
|
||||||
self._task_state.current_stream_generate_state = None
|
self._task_state.current_stream_generate_state = None
|
||||||
|
|
||||||
|
@ -525,7 +584,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
if not self._task_state.current_stream_generate_state:
|
if not self._task_state.current_stream_generate_state:
|
||||||
return None
|
return
|
||||||
|
|
||||||
route_chunks = self._task_state.current_stream_generate_state.generate_route[
|
route_chunks = self._task_state.current_stream_generate_state.generate_route[
|
||||||
self._task_state.current_stream_generate_state.current_route_position:]
|
self._task_state.current_stream_generate_state.current_route_position:]
|
||||||
|
@ -573,7 +632,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||||
# get route chunk node execution info
|
# get route chunk node execution info
|
||||||
route_chunk_node_execution_info = self._task_state.ran_node_execution_infos[route_chunk_node_id]
|
route_chunk_node_execution_info = self._task_state.ran_node_execution_infos[route_chunk_node_id]
|
||||||
if (route_chunk_node_execution_info.node_type == NodeType.LLM
|
if (route_chunk_node_execution_info.node_type == NodeType.LLM
|
||||||
and latest_node_execution_info.node_type == NodeType.LLM):
|
and latest_node_execution_info.node_type == NodeType.LLM):
|
||||||
# only LLM support chunk stream output
|
# only LLM support chunk stream output
|
||||||
self._task_state.current_stream_generate_state.current_route_position += 1
|
self._task_state.current_stream_generate_state.current_route_position += 1
|
||||||
continue
|
continue
|
||||||
|
@ -643,7 +702,7 @@ class AdvancedChatAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCyc
|
||||||
|
|
||||||
# all route chunks are generated
|
# all route chunks are generated
|
||||||
if self._task_state.current_stream_generate_state.current_route_position == len(
|
if self._task_state.current_stream_generate_state.current_route_position == len(
|
||||||
self._task_state.current_stream_generate_state.generate_route
|
self._task_state.current_stream_generate_state.generate_route
|
||||||
):
|
):
|
||||||
self._task_state.current_stream_generate_state = None
|
self._task_state.current_stream_generate_state = None
|
||||||
|
|
||||||
|
|
|
@ -51,7 +51,6 @@ class AppQueueManager:
|
||||||
listen_timeout = current_app.config.get("APP_MAX_EXECUTION_TIME")
|
listen_timeout = current_app.config.get("APP_MAX_EXECUTION_TIME")
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
last_ping_time = 0
|
last_ping_time = 0
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
message = self._q.get(timeout=1)
|
message = self._q.get(timeout=1)
|
||||||
|
|
|
@ -1,7 +1,10 @@
|
||||||
import logging
|
import logging
|
||||||
|
import time
|
||||||
from collections.abc import Generator
|
from collections.abc import Generator
|
||||||
from typing import Any, Optional, Union
|
from typing import Any, Optional, Union
|
||||||
|
|
||||||
|
from constants.tts_auto_play_timeout import TTS_AUTO_PLAY_TIMEOUT, TTS_AUTO_PLAY_YIELD_CPU_TIME
|
||||||
|
from core.app.apps.advanced_chat.app_generator_tts_publisher import AppGeneratorTTSPublisher, AudioTrunk
|
||||||
from core.app.apps.base_app_queue_manager import AppQueueManager
|
from core.app.apps.base_app_queue_manager import AppQueueManager
|
||||||
from core.app.entities.app_invoke_entities import (
|
from core.app.entities.app_invoke_entities import (
|
||||||
InvokeFrom,
|
InvokeFrom,
|
||||||
|
@ -25,6 +28,8 @@ from core.app.entities.queue_entities import (
|
||||||
)
|
)
|
||||||
from core.app.entities.task_entities import (
|
from core.app.entities.task_entities import (
|
||||||
ErrorStreamResponse,
|
ErrorStreamResponse,
|
||||||
|
MessageAudioEndStreamResponse,
|
||||||
|
MessageAudioStreamResponse,
|
||||||
StreamResponse,
|
StreamResponse,
|
||||||
TextChunkStreamResponse,
|
TextChunkStreamResponse,
|
||||||
TextReplaceStreamResponse,
|
TextReplaceStreamResponse,
|
||||||
|
@ -105,7 +110,7 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
|
||||||
db.session.refresh(self._user)
|
db.session.refresh(self._user)
|
||||||
db.session.close()
|
db.session.close()
|
||||||
|
|
||||||
generator = self._process_stream_response(
|
generator = self._wrapper_process_stream_response(
|
||||||
trace_manager=self._application_generate_entity.trace_manager
|
trace_manager=self._application_generate_entity.trace_manager
|
||||||
)
|
)
|
||||||
if self._stream:
|
if self._stream:
|
||||||
|
@ -161,8 +166,58 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
|
||||||
stream_response=stream_response
|
stream_response=stream_response
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _listenAudioMsg(self, publisher, task_id: str):
|
||||||
|
if not publisher:
|
||||||
|
return None
|
||||||
|
audio_msg: AudioTrunk = publisher.checkAndGetAudio()
|
||||||
|
if audio_msg and audio_msg.status != "finish":
|
||||||
|
return MessageAudioStreamResponse(audio=audio_msg.audio, task_id=task_id)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _wrapper_process_stream_response(self, trace_manager: Optional[TraceQueueManager] = None) -> \
|
||||||
|
Generator[StreamResponse, None, None]:
|
||||||
|
|
||||||
|
publisher = None
|
||||||
|
task_id = self._application_generate_entity.task_id
|
||||||
|
tenant_id = self._application_generate_entity.app_config.tenant_id
|
||||||
|
features_dict = self._workflow.features_dict
|
||||||
|
|
||||||
|
if features_dict.get('text_to_speech') and features_dict['text_to_speech'].get('enabled') and features_dict[
|
||||||
|
'text_to_speech'].get('autoPlay') == 'enabled':
|
||||||
|
publisher = AppGeneratorTTSPublisher(tenant_id, features_dict['text_to_speech'].get('voice'))
|
||||||
|
for response in self._process_stream_response(publisher=publisher, trace_manager=trace_manager):
|
||||||
|
while True:
|
||||||
|
audio_response = self._listenAudioMsg(publisher, task_id=task_id)
|
||||||
|
if audio_response:
|
||||||
|
yield audio_response
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
yield response
|
||||||
|
|
||||||
|
start_listener_time = time.time()
|
||||||
|
while (time.time() - start_listener_time) < TTS_AUTO_PLAY_TIMEOUT:
|
||||||
|
try:
|
||||||
|
if not publisher:
|
||||||
|
break
|
||||||
|
audio_trunk = publisher.checkAndGetAudio()
|
||||||
|
if audio_trunk is None:
|
||||||
|
# release cpu
|
||||||
|
# sleep 20 ms ( 40ms => 1280 byte audio file,20ms => 640 byte audio file)
|
||||||
|
time.sleep(TTS_AUTO_PLAY_YIELD_CPU_TIME)
|
||||||
|
continue
|
||||||
|
if audio_trunk.status == "finish":
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
yield MessageAudioStreamResponse(audio=audio_trunk.audio, task_id=task_id)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(e)
|
||||||
|
break
|
||||||
|
yield MessageAudioEndStreamResponse(audio='', task_id=task_id)
|
||||||
|
|
||||||
|
|
||||||
def _process_stream_response(
|
def _process_stream_response(
|
||||||
self,
|
self,
|
||||||
|
publisher: AppGeneratorTTSPublisher,
|
||||||
trace_manager: Optional[TraceQueueManager] = None
|
trace_manager: Optional[TraceQueueManager] = None
|
||||||
) -> Generator[StreamResponse, None, None]:
|
) -> Generator[StreamResponse, None, None]:
|
||||||
"""
|
"""
|
||||||
|
@ -170,6 +225,8 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
for message in self._queue_manager.listen():
|
for message in self._queue_manager.listen():
|
||||||
|
if publisher:
|
||||||
|
publisher.publish(message=message)
|
||||||
event = message.event
|
event = message.event
|
||||||
|
|
||||||
if isinstance(event, QueueErrorEvent):
|
if isinstance(event, QueueErrorEvent):
|
||||||
|
@ -251,6 +308,10 @@ class WorkflowAppGenerateTaskPipeline(BasedGenerateTaskPipeline, WorkflowCycleMa
|
||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if publisher:
|
||||||
|
publisher.publish(None)
|
||||||
|
|
||||||
|
|
||||||
def _save_workflow_app_log(self, workflow_run: WorkflowRun) -> None:
|
def _save_workflow_app_log(self, workflow_run: WorkflowRun) -> None:
|
||||||
"""
|
"""
|
||||||
Save workflow app log.
|
Save workflow app log.
|
||||||
|
|
|
@ -69,6 +69,7 @@ class WorkflowTaskState(TaskState):
|
||||||
|
|
||||||
iteration_nested_node_ids: list[str] = None
|
iteration_nested_node_ids: list[str] = None
|
||||||
|
|
||||||
|
|
||||||
class AdvancedChatTaskState(WorkflowTaskState):
|
class AdvancedChatTaskState(WorkflowTaskState):
|
||||||
"""
|
"""
|
||||||
AdvancedChatTaskState entity
|
AdvancedChatTaskState entity
|
||||||
|
@ -86,6 +87,8 @@ class StreamEvent(Enum):
|
||||||
ERROR = "error"
|
ERROR = "error"
|
||||||
MESSAGE = "message"
|
MESSAGE = "message"
|
||||||
MESSAGE_END = "message_end"
|
MESSAGE_END = "message_end"
|
||||||
|
TTS_MESSAGE = "tts_message"
|
||||||
|
TTS_MESSAGE_END = "tts_message_end"
|
||||||
MESSAGE_FILE = "message_file"
|
MESSAGE_FILE = "message_file"
|
||||||
MESSAGE_REPLACE = "message_replace"
|
MESSAGE_REPLACE = "message_replace"
|
||||||
AGENT_THOUGHT = "agent_thought"
|
AGENT_THOUGHT = "agent_thought"
|
||||||
|
@ -130,6 +133,22 @@ class MessageStreamResponse(StreamResponse):
|
||||||
answer: str
|
answer: str
|
||||||
|
|
||||||
|
|
||||||
|
class MessageAudioStreamResponse(StreamResponse):
|
||||||
|
"""
|
||||||
|
MessageStreamResponse entity
|
||||||
|
"""
|
||||||
|
event: StreamEvent = StreamEvent.TTS_MESSAGE
|
||||||
|
audio: str
|
||||||
|
|
||||||
|
|
||||||
|
class MessageAudioEndStreamResponse(StreamResponse):
|
||||||
|
"""
|
||||||
|
MessageStreamResponse entity
|
||||||
|
"""
|
||||||
|
event: StreamEvent = StreamEvent.TTS_MESSAGE_END
|
||||||
|
audio: str
|
||||||
|
|
||||||
|
|
||||||
class MessageEndStreamResponse(StreamResponse):
|
class MessageEndStreamResponse(StreamResponse):
|
||||||
"""
|
"""
|
||||||
MessageEndStreamResponse entity
|
MessageEndStreamResponse entity
|
||||||
|
@ -186,6 +205,7 @@ class WorkflowStartStreamResponse(StreamResponse):
|
||||||
"""
|
"""
|
||||||
WorkflowStartStreamResponse entity
|
WorkflowStartStreamResponse entity
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class Data(BaseModel):
|
class Data(BaseModel):
|
||||||
"""
|
"""
|
||||||
Data entity
|
Data entity
|
||||||
|
@ -205,6 +225,7 @@ class WorkflowFinishStreamResponse(StreamResponse):
|
||||||
"""
|
"""
|
||||||
WorkflowFinishStreamResponse entity
|
WorkflowFinishStreamResponse entity
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class Data(BaseModel):
|
class Data(BaseModel):
|
||||||
"""
|
"""
|
||||||
Data entity
|
Data entity
|
||||||
|
@ -232,6 +253,7 @@ class NodeStartStreamResponse(StreamResponse):
|
||||||
"""
|
"""
|
||||||
NodeStartStreamResponse entity
|
NodeStartStreamResponse entity
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class Data(BaseModel):
|
class Data(BaseModel):
|
||||||
"""
|
"""
|
||||||
Data entity
|
Data entity
|
||||||
|
@ -273,6 +295,7 @@ class NodeFinishStreamResponse(StreamResponse):
|
||||||
"""
|
"""
|
||||||
NodeFinishStreamResponse entity
|
NodeFinishStreamResponse entity
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class Data(BaseModel):
|
class Data(BaseModel):
|
||||||
"""
|
"""
|
||||||
Data entity
|
Data entity
|
||||||
|
@ -323,10 +346,12 @@ class NodeFinishStreamResponse(StreamResponse):
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class IterationNodeStartStreamResponse(StreamResponse):
|
class IterationNodeStartStreamResponse(StreamResponse):
|
||||||
"""
|
"""
|
||||||
NodeStartStreamResponse entity
|
NodeStartStreamResponse entity
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class Data(BaseModel):
|
class Data(BaseModel):
|
||||||
"""
|
"""
|
||||||
Data entity
|
Data entity
|
||||||
|
@ -344,10 +369,12 @@ class IterationNodeStartStreamResponse(StreamResponse):
|
||||||
workflow_run_id: str
|
workflow_run_id: str
|
||||||
data: Data
|
data: Data
|
||||||
|
|
||||||
|
|
||||||
class IterationNodeNextStreamResponse(StreamResponse):
|
class IterationNodeNextStreamResponse(StreamResponse):
|
||||||
"""
|
"""
|
||||||
NodeStartStreamResponse entity
|
NodeStartStreamResponse entity
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class Data(BaseModel):
|
class Data(BaseModel):
|
||||||
"""
|
"""
|
||||||
Data entity
|
Data entity
|
||||||
|
@ -365,10 +392,12 @@ class IterationNodeNextStreamResponse(StreamResponse):
|
||||||
workflow_run_id: str
|
workflow_run_id: str
|
||||||
data: Data
|
data: Data
|
||||||
|
|
||||||
|
|
||||||
class IterationNodeCompletedStreamResponse(StreamResponse):
|
class IterationNodeCompletedStreamResponse(StreamResponse):
|
||||||
"""
|
"""
|
||||||
NodeCompletedStreamResponse entity
|
NodeCompletedStreamResponse entity
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class Data(BaseModel):
|
class Data(BaseModel):
|
||||||
"""
|
"""
|
||||||
Data entity
|
Data entity
|
||||||
|
@ -393,10 +422,12 @@ class IterationNodeCompletedStreamResponse(StreamResponse):
|
||||||
workflow_run_id: str
|
workflow_run_id: str
|
||||||
data: Data
|
data: Data
|
||||||
|
|
||||||
|
|
||||||
class TextChunkStreamResponse(StreamResponse):
|
class TextChunkStreamResponse(StreamResponse):
|
||||||
"""
|
"""
|
||||||
TextChunkStreamResponse entity
|
TextChunkStreamResponse entity
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class Data(BaseModel):
|
class Data(BaseModel):
|
||||||
"""
|
"""
|
||||||
Data entity
|
Data entity
|
||||||
|
@ -411,6 +442,7 @@ class TextReplaceStreamResponse(StreamResponse):
|
||||||
"""
|
"""
|
||||||
TextReplaceStreamResponse entity
|
TextReplaceStreamResponse entity
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class Data(BaseModel):
|
class Data(BaseModel):
|
||||||
"""
|
"""
|
||||||
Data entity
|
Data entity
|
||||||
|
@ -473,6 +505,7 @@ class ChatbotAppBlockingResponse(AppBlockingResponse):
|
||||||
"""
|
"""
|
||||||
ChatbotAppBlockingResponse entity
|
ChatbotAppBlockingResponse entity
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class Data(BaseModel):
|
class Data(BaseModel):
|
||||||
"""
|
"""
|
||||||
Data entity
|
Data entity
|
||||||
|
@ -492,6 +525,7 @@ class CompletionAppBlockingResponse(AppBlockingResponse):
|
||||||
"""
|
"""
|
||||||
CompletionAppBlockingResponse entity
|
CompletionAppBlockingResponse entity
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class Data(BaseModel):
|
class Data(BaseModel):
|
||||||
"""
|
"""
|
||||||
Data entity
|
Data entity
|
||||||
|
@ -510,6 +544,7 @@ class WorkflowAppBlockingResponse(AppBlockingResponse):
|
||||||
"""
|
"""
|
||||||
WorkflowAppBlockingResponse entity
|
WorkflowAppBlockingResponse entity
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class Data(BaseModel):
|
class Data(BaseModel):
|
||||||
"""
|
"""
|
||||||
Data entity
|
Data entity
|
||||||
|
@ -528,10 +563,12 @@ class WorkflowAppBlockingResponse(AppBlockingResponse):
|
||||||
workflow_run_id: str
|
workflow_run_id: str
|
||||||
data: Data
|
data: Data
|
||||||
|
|
||||||
|
|
||||||
class WorkflowIterationState(BaseModel):
|
class WorkflowIterationState(BaseModel):
|
||||||
"""
|
"""
|
||||||
WorkflowIterationState entity
|
WorkflowIterationState entity
|
||||||
"""
|
"""
|
||||||
|
|
||||||
class Data(BaseModel):
|
class Data(BaseModel):
|
||||||
"""
|
"""
|
||||||
Data entity
|
Data entity
|
||||||
|
|
|
@ -4,6 +4,8 @@ import time
|
||||||
from collections.abc import Generator
|
from collections.abc import Generator
|
||||||
from typing import Optional, Union, cast
|
from typing import Optional, Union, cast
|
||||||
|
|
||||||
|
from constants.tts_auto_play_timeout import TTS_AUTO_PLAY_TIMEOUT, TTS_AUTO_PLAY_YIELD_CPU_TIME
|
||||||
|
from core.app.apps.advanced_chat.app_generator_tts_publisher import AppGeneratorTTSPublisher, AudioTrunk
|
||||||
from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
|
from core.app.apps.base_app_queue_manager import AppQueueManager, PublishFrom
|
||||||
from core.app.entities.app_invoke_entities import (
|
from core.app.entities.app_invoke_entities import (
|
||||||
AgentChatAppGenerateEntity,
|
AgentChatAppGenerateEntity,
|
||||||
|
@ -32,6 +34,8 @@ from core.app.entities.task_entities import (
|
||||||
CompletionAppStreamResponse,
|
CompletionAppStreamResponse,
|
||||||
EasyUITaskState,
|
EasyUITaskState,
|
||||||
ErrorStreamResponse,
|
ErrorStreamResponse,
|
||||||
|
MessageAudioEndStreamResponse,
|
||||||
|
MessageAudioStreamResponse,
|
||||||
MessageEndStreamResponse,
|
MessageEndStreamResponse,
|
||||||
StreamResponse,
|
StreamResponse,
|
||||||
)
|
)
|
||||||
|
@ -87,6 +91,7 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan
|
||||||
"""
|
"""
|
||||||
super().__init__(application_generate_entity, queue_manager, user, stream)
|
super().__init__(application_generate_entity, queue_manager, user, stream)
|
||||||
self._model_config = application_generate_entity.model_conf
|
self._model_config = application_generate_entity.model_conf
|
||||||
|
self._app_config = application_generate_entity.app_config
|
||||||
self._conversation = conversation
|
self._conversation = conversation
|
||||||
self._message = message
|
self._message = message
|
||||||
|
|
||||||
|
@ -102,7 +107,7 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan
|
||||||
self._conversation_name_generate_thread = None
|
self._conversation_name_generate_thread = None
|
||||||
|
|
||||||
def process(
|
def process(
|
||||||
self,
|
self,
|
||||||
) -> Union[
|
) -> Union[
|
||||||
ChatbotAppBlockingResponse,
|
ChatbotAppBlockingResponse,
|
||||||
CompletionAppBlockingResponse,
|
CompletionAppBlockingResponse,
|
||||||
|
@ -123,7 +128,7 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan
|
||||||
self._application_generate_entity.query
|
self._application_generate_entity.query
|
||||||
)
|
)
|
||||||
|
|
||||||
generator = self._process_stream_response(
|
generator = self._wrapper_process_stream_response(
|
||||||
trace_manager=self._application_generate_entity.trace_manager
|
trace_manager=self._application_generate_entity.trace_manager
|
||||||
)
|
)
|
||||||
if self._stream:
|
if self._stream:
|
||||||
|
@ -202,14 +207,64 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan
|
||||||
stream_response=stream_response
|
stream_response=stream_response
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _listenAudioMsg(self, publisher, task_id: str):
|
||||||
|
if publisher is None:
|
||||||
|
return None
|
||||||
|
audio_msg: AudioTrunk = publisher.checkAndGetAudio()
|
||||||
|
if audio_msg and audio_msg.status != "finish":
|
||||||
|
# audio_str = audio_msg.audio.decode('utf-8', errors='ignore')
|
||||||
|
return MessageAudioStreamResponse(audio=audio_msg.audio, task_id=task_id)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _wrapper_process_stream_response(self, trace_manager: Optional[TraceQueueManager] = None) -> \
|
||||||
|
Generator[StreamResponse, None, None]:
|
||||||
|
|
||||||
|
tenant_id = self._application_generate_entity.app_config.tenant_id
|
||||||
|
task_id = self._application_generate_entity.task_id
|
||||||
|
publisher = None
|
||||||
|
text_to_speech_dict = self._app_config.app_model_config_dict.get('text_to_speech')
|
||||||
|
if text_to_speech_dict and text_to_speech_dict.get('autoPlay') == 'enabled' and text_to_speech_dict.get('enabled'):
|
||||||
|
publisher = AppGeneratorTTSPublisher(tenant_id, text_to_speech_dict.get('voice', None))
|
||||||
|
for response in self._process_stream_response(publisher=publisher, trace_manager=trace_manager):
|
||||||
|
while True:
|
||||||
|
audio_response = self._listenAudioMsg(publisher, task_id)
|
||||||
|
if audio_response:
|
||||||
|
yield audio_response
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
yield response
|
||||||
|
|
||||||
|
start_listener_time = time.time()
|
||||||
|
# timeout
|
||||||
|
while (time.time() - start_listener_time) < TTS_AUTO_PLAY_TIMEOUT:
|
||||||
|
if publisher is None:
|
||||||
|
break
|
||||||
|
audio = publisher.checkAndGetAudio()
|
||||||
|
if audio is None:
|
||||||
|
# release cpu
|
||||||
|
# sleep 20 ms ( 40ms => 1280 byte audio file,20ms => 640 byte audio file)
|
||||||
|
time.sleep(TTS_AUTO_PLAY_YIELD_CPU_TIME)
|
||||||
|
continue
|
||||||
|
if audio.status == "finish":
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
start_listener_time = time.time()
|
||||||
|
yield MessageAudioStreamResponse(audio=audio.audio,
|
||||||
|
task_id=task_id)
|
||||||
|
yield MessageAudioEndStreamResponse(audio='', task_id=task_id)
|
||||||
|
|
||||||
def _process_stream_response(
|
def _process_stream_response(
|
||||||
self, trace_manager: Optional[TraceQueueManager] = None
|
self,
|
||||||
|
publisher: AppGeneratorTTSPublisher,
|
||||||
|
trace_manager: Optional[TraceQueueManager] = None
|
||||||
) -> Generator[StreamResponse, None, None]:
|
) -> Generator[StreamResponse, None, None]:
|
||||||
"""
|
"""
|
||||||
Process stream response.
|
Process stream response.
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
for message in self._queue_manager.listen():
|
for message in self._queue_manager.listen():
|
||||||
|
if publisher:
|
||||||
|
publisher.publish(message)
|
||||||
event = message.event
|
event = message.event
|
||||||
|
|
||||||
if isinstance(event, QueueErrorEvent):
|
if isinstance(event, QueueErrorEvent):
|
||||||
|
@ -272,12 +327,13 @@ class EasyUIBasedGenerateTaskPipeline(BasedGenerateTaskPipeline, MessageCycleMan
|
||||||
yield self._ping_stream_response()
|
yield self._ping_stream_response()
|
||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
|
if publisher:
|
||||||
|
publisher.publish(None)
|
||||||
if self._conversation_name_generate_thread:
|
if self._conversation_name_generate_thread:
|
||||||
self._conversation_name_generate_thread.join()
|
self._conversation_name_generate_thread.join()
|
||||||
|
|
||||||
def _save_message(
|
def _save_message(
|
||||||
self, trace_manager: Optional[TraceQueueManager] = None
|
self, trace_manager: Optional[TraceQueueManager] = None
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Save message.
|
Save message.
|
||||||
|
|
|
@ -264,7 +264,7 @@ class ModelInstance:
|
||||||
user=user
|
user=user
|
||||||
)
|
)
|
||||||
|
|
||||||
def invoke_tts(self, content_text: str, tenant_id: str, voice: str, streaming: bool, user: Optional[str] = None) \
|
def invoke_tts(self, content_text: str, tenant_id: str, voice: str, user: Optional[str] = None) \
|
||||||
-> str:
|
-> str:
|
||||||
"""
|
"""
|
||||||
Invoke large language tts model
|
Invoke large language tts model
|
||||||
|
@ -287,8 +287,7 @@ class ModelInstance:
|
||||||
content_text=content_text,
|
content_text=content_text,
|
||||||
user=user,
|
user=user,
|
||||||
tenant_id=tenant_id,
|
tenant_id=tenant_id,
|
||||||
voice=voice,
|
voice=voice
|
||||||
streaming=streaming
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def _round_robin_invoke(self, function: Callable, *args, **kwargs):
|
def _round_robin_invoke(self, function: Callable, *args, **kwargs):
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
import uuid
|
import uuid
|
||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
|
@ -10,7 +12,7 @@ from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelTy
|
||||||
from core.model_runtime.errors.invoke import InvokeBadRequestError
|
from core.model_runtime.errors.invoke import InvokeBadRequestError
|
||||||
from core.model_runtime.model_providers.__base.ai_model import AIModel
|
from core.model_runtime.model_providers.__base.ai_model import AIModel
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
class TTSModel(AIModel):
|
class TTSModel(AIModel):
|
||||||
"""
|
"""
|
||||||
Model class for ttstext model.
|
Model class for ttstext model.
|
||||||
|
@ -20,7 +22,7 @@ class TTSModel(AIModel):
|
||||||
# pydantic configs
|
# pydantic configs
|
||||||
model_config = ConfigDict(protected_namespaces=())
|
model_config = ConfigDict(protected_namespaces=())
|
||||||
|
|
||||||
def invoke(self, model: str, tenant_id: str, credentials: dict, content_text: str, voice: str, streaming: bool,
|
def invoke(self, model: str, tenant_id: str, credentials: dict, content_text: str, voice: str,
|
||||||
user: Optional[str] = None):
|
user: Optional[str] = None):
|
||||||
"""
|
"""
|
||||||
Invoke large language model
|
Invoke large language model
|
||||||
|
@ -35,14 +37,15 @@ class TTSModel(AIModel):
|
||||||
:return: translated audio file
|
:return: translated audio file
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
logger.info(f"Invoke TTS model: {model} , invoke content : {content_text}")
|
||||||
self._is_ffmpeg_installed()
|
self._is_ffmpeg_installed()
|
||||||
return self._invoke(model=model, credentials=credentials, user=user, streaming=streaming,
|
return self._invoke(model=model, credentials=credentials, user=user,
|
||||||
content_text=content_text, voice=voice, tenant_id=tenant_id)
|
content_text=content_text, voice=voice, tenant_id=tenant_id)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise self._transform_invoke_error(e)
|
raise self._transform_invoke_error(e)
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def _invoke(self, model: str, tenant_id: str, credentials: dict, content_text: str, voice: str, streaming: bool,
|
def _invoke(self, model: str, tenant_id: str, credentials: dict, content_text: str, voice: str,
|
||||||
user: Optional[str] = None):
|
user: Optional[str] = None):
|
||||||
"""
|
"""
|
||||||
Invoke large language model
|
Invoke large language model
|
||||||
|
@ -123,26 +126,26 @@ class TTSModel(AIModel):
|
||||||
return model_schema.model_properties[ModelPropertyKey.MAX_WORKERS]
|
return model_schema.model_properties[ModelPropertyKey.MAX_WORKERS]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _split_text_into_sentences(text: str, limit: int, delimiters=None):
|
def _split_text_into_sentences(org_text, max_length=2000, pattern=r'[。.!?]'):
|
||||||
if delimiters is None:
|
match = re.compile(pattern)
|
||||||
delimiters = set('。!?;\n')
|
tx = match.finditer(org_text)
|
||||||
|
start = 0
|
||||||
buf = []
|
result = []
|
||||||
word_count = 0
|
one_sentence = ''
|
||||||
for char in text:
|
for i in tx:
|
||||||
buf.append(char)
|
end = i.regs[0][1]
|
||||||
if char in delimiters:
|
tmp = org_text[start:end]
|
||||||
if word_count >= limit:
|
if len(one_sentence + tmp) > max_length:
|
||||||
yield ''.join(buf)
|
result.append(one_sentence)
|
||||||
buf = []
|
one_sentence = ''
|
||||||
word_count = 0
|
one_sentence += tmp
|
||||||
else:
|
start = end
|
||||||
word_count += 1
|
last_sens = org_text[start:]
|
||||||
else:
|
if last_sens:
|
||||||
word_count += 1
|
one_sentence += last_sens
|
||||||
|
if one_sentence != '':
|
||||||
if buf:
|
result.append(one_sentence)
|
||||||
yield ''.join(buf)
|
return result
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _is_ffmpeg_installed():
|
def _is_ffmpeg_installed():
|
||||||
|
|
|
@ -4,7 +4,7 @@ from functools import reduce
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from flask import Response, stream_with_context
|
from flask import Response
|
||||||
from openai import AzureOpenAI
|
from openai import AzureOpenAI
|
||||||
from pydub import AudioSegment
|
from pydub import AudioSegment
|
||||||
|
|
||||||
|
@ -14,7 +14,6 @@ from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||||
from core.model_runtime.model_providers.__base.tts_model import TTSModel
|
from core.model_runtime.model_providers.__base.tts_model import TTSModel
|
||||||
from core.model_runtime.model_providers.azure_openai._common import _CommonAzureOpenAI
|
from core.model_runtime.model_providers.azure_openai._common import _CommonAzureOpenAI
|
||||||
from core.model_runtime.model_providers.azure_openai._constant import TTS_BASE_MODELS, AzureBaseModel
|
from core.model_runtime.model_providers.azure_openai._constant import TTS_BASE_MODELS, AzureBaseModel
|
||||||
from extensions.ext_storage import storage
|
|
||||||
|
|
||||||
|
|
||||||
class AzureOpenAIText2SpeechModel(_CommonAzureOpenAI, TTSModel):
|
class AzureOpenAIText2SpeechModel(_CommonAzureOpenAI, TTSModel):
|
||||||
|
@ -23,7 +22,7 @@ class AzureOpenAIText2SpeechModel(_CommonAzureOpenAI, TTSModel):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def _invoke(self, model: str, tenant_id: str, credentials: dict,
|
def _invoke(self, model: str, tenant_id: str, credentials: dict,
|
||||||
content_text: str, voice: str, streaming: bool, user: Optional[str] = None) -> any:
|
content_text: str, voice: str, user: Optional[str] = None) -> any:
|
||||||
"""
|
"""
|
||||||
_invoke text2speech model
|
_invoke text2speech model
|
||||||
|
|
||||||
|
@ -32,30 +31,23 @@ class AzureOpenAIText2SpeechModel(_CommonAzureOpenAI, TTSModel):
|
||||||
:param credentials: model credentials
|
:param credentials: model credentials
|
||||||
:param content_text: text content to be translated
|
:param content_text: text content to be translated
|
||||||
:param voice: model timbre
|
:param voice: model timbre
|
||||||
:param streaming: output is streaming
|
|
||||||
:param user: unique user id
|
:param user: unique user id
|
||||||
:return: text translated to audio file
|
:return: text translated to audio file
|
||||||
"""
|
"""
|
||||||
audio_type = self._get_model_audio_type(model, credentials)
|
|
||||||
if not voice or voice not in [d['value'] for d in self.get_tts_model_voices(model=model, credentials=credentials)]:
|
if not voice or voice not in [d['value'] for d in self.get_tts_model_voices(model=model, credentials=credentials)]:
|
||||||
voice = self._get_model_default_voice(model, credentials)
|
voice = self._get_model_default_voice(model, credentials)
|
||||||
if streaming:
|
|
||||||
return Response(stream_with_context(self._tts_invoke_streaming(model=model,
|
|
||||||
credentials=credentials,
|
|
||||||
content_text=content_text,
|
|
||||||
tenant_id=tenant_id,
|
|
||||||
voice=voice)),
|
|
||||||
status=200, mimetype=f'audio/{audio_type}')
|
|
||||||
else:
|
|
||||||
return self._tts_invoke(model=model, credentials=credentials, content_text=content_text, voice=voice)
|
|
||||||
|
|
||||||
def validate_credentials(self, model: str, credentials: dict, user: Optional[str] = None) -> None:
|
return self._tts_invoke_streaming(model=model,
|
||||||
|
credentials=credentials,
|
||||||
|
content_text=content_text,
|
||||||
|
voice=voice)
|
||||||
|
|
||||||
|
def validate_credentials(self, model: str, credentials: dict) -> None:
|
||||||
"""
|
"""
|
||||||
validate credentials text2speech model
|
validate credentials text2speech model
|
||||||
|
|
||||||
:param model: model name
|
:param model: model name
|
||||||
:param credentials: model credentials
|
:param credentials: model credentials
|
||||||
:param user: unique user id
|
|
||||||
:return: text translated to audio file
|
:return: text translated to audio file
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
|
@ -82,7 +74,7 @@ class AzureOpenAIText2SpeechModel(_CommonAzureOpenAI, TTSModel):
|
||||||
word_limit = self._get_model_word_limit(model, credentials)
|
word_limit = self._get_model_word_limit(model, credentials)
|
||||||
max_workers = self._get_model_workers_limit(model, credentials)
|
max_workers = self._get_model_workers_limit(model, credentials)
|
||||||
try:
|
try:
|
||||||
sentences = list(self._split_text_into_sentences(text=content_text, limit=word_limit))
|
sentences = list(self._split_text_into_sentences(org_text=content_text, max_length=word_limit))
|
||||||
audio_bytes_list = []
|
audio_bytes_list = []
|
||||||
|
|
||||||
# Create a thread pool and map the function to the list of sentences
|
# Create a thread pool and map the function to the list of sentences
|
||||||
|
@ -107,34 +99,37 @@ class AzureOpenAIText2SpeechModel(_CommonAzureOpenAI, TTSModel):
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
raise InvokeBadRequestError(str(ex))
|
raise InvokeBadRequestError(str(ex))
|
||||||
|
|
||||||
# Todo: To improve the streaming function
|
def _tts_invoke_streaming(self, model: str, credentials: dict, content_text: str,
|
||||||
def _tts_invoke_streaming(self, model: str, tenant_id: str, credentials: dict, content_text: str,
|
|
||||||
voice: str) -> any:
|
voice: str) -> any:
|
||||||
"""
|
"""
|
||||||
_tts_invoke_streaming text2speech model
|
_tts_invoke_streaming text2speech model
|
||||||
|
|
||||||
:param model: model name
|
:param model: model name
|
||||||
:param tenant_id: user tenant id
|
|
||||||
:param credentials: model credentials
|
:param credentials: model credentials
|
||||||
:param content_text: text content to be translated
|
:param content_text: text content to be translated
|
||||||
:param voice: model timbre
|
:param voice: model timbre
|
||||||
:return: text translated to audio file
|
:return: text translated to audio file
|
||||||
"""
|
"""
|
||||||
# transform credentials to kwargs for model instance
|
|
||||||
credentials_kwargs = self._to_credential_kwargs(credentials)
|
|
||||||
if not voice or voice not in self.get_tts_model_voices(model=model, credentials=credentials):
|
|
||||||
voice = self._get_model_default_voice(model, credentials)
|
|
||||||
word_limit = self._get_model_word_limit(model, credentials)
|
|
||||||
audio_type = self._get_model_audio_type(model, credentials)
|
|
||||||
tts_file_id = self._get_file_name(content_text)
|
|
||||||
file_path = f'generate_files/audio/{tenant_id}/{tts_file_id}.{audio_type}'
|
|
||||||
try:
|
try:
|
||||||
|
# doc: https://platform.openai.com/docs/guides/text-to-speech
|
||||||
|
credentials_kwargs = self._to_credential_kwargs(credentials)
|
||||||
client = AzureOpenAI(**credentials_kwargs)
|
client = AzureOpenAI(**credentials_kwargs)
|
||||||
sentences = list(self._split_text_into_sentences(text=content_text, limit=word_limit))
|
# max font is 4096,there is 3500 limit for each request
|
||||||
for sentence in sentences:
|
max_length = 3500
|
||||||
response = client.audio.speech.create(model=model, voice=voice, input=sentence.strip())
|
if len(content_text) > max_length:
|
||||||
# response.stream_to_file(file_path)
|
sentences = self._split_text_into_sentences(content_text, max_length=max_length)
|
||||||
storage.save(file_path, response.read())
|
executor = concurrent.futures.ThreadPoolExecutor(max_workers=min(3, len(sentences)))
|
||||||
|
futures = [executor.submit(client.audio.speech.with_streaming_response.create, model=model,
|
||||||
|
response_format="mp3",
|
||||||
|
input=sentences[i], voice=voice) for i in range(len(sentences))]
|
||||||
|
for index, future in enumerate(futures):
|
||||||
|
yield from future.result().__enter__().iter_bytes(1024)
|
||||||
|
|
||||||
|
else:
|
||||||
|
response = client.audio.speech.with_streaming_response.create(model=model, voice=voice,
|
||||||
|
response_format="mp3",
|
||||||
|
input=content_text.strip())
|
||||||
|
|
||||||
|
yield from response.__enter__().iter_bytes(1024)
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
raise InvokeBadRequestError(str(ex))
|
raise InvokeBadRequestError(str(ex))
|
||||||
|
|
||||||
|
@ -162,7 +157,7 @@ class AzureOpenAIText2SpeechModel(_CommonAzureOpenAI, TTSModel):
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_ai_model_entity(base_model_name: str, model: str) -> AzureBaseModel:
|
def _get_ai_model_entity(base_model_name: str, model: str) -> AzureBaseModel | None:
|
||||||
for ai_model_entity in TTS_BASE_MODELS:
|
for ai_model_entity in TTS_BASE_MODELS:
|
||||||
if ai_model_entity.base_model_name == base_model_name:
|
if ai_model_entity.base_model_name == base_model_name:
|
||||||
ai_model_entity_copy = copy.deepcopy(ai_model_entity)
|
ai_model_entity_copy = copy.deepcopy(ai_model_entity)
|
||||||
|
@ -170,5 +165,4 @@ class AzureOpenAIText2SpeechModel(_CommonAzureOpenAI, TTSModel):
|
||||||
ai_model_entity_copy.entity.label.en_US = model
|
ai_model_entity_copy.entity.label.en_US = model
|
||||||
ai_model_entity_copy.entity.label.zh_Hans = model
|
ai_model_entity_copy.entity.label.zh_Hans = model
|
||||||
return ai_model_entity_copy
|
return ai_model_entity_copy
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -21,7 +21,7 @@ model_properties:
|
||||||
- mode: 'shimmer'
|
- mode: 'shimmer'
|
||||||
name: 'Shimmer'
|
name: 'Shimmer'
|
||||||
language: [ 'zh-Hans', 'en-US', 'de-DE', 'fr-FR', 'es-ES', 'it-IT', 'th-TH', 'id-ID' ]
|
language: [ 'zh-Hans', 'en-US', 'de-DE', 'fr-FR', 'es-ES', 'it-IT', 'th-TH', 'id-ID' ]
|
||||||
word_limit: 120
|
word_limit: 3500
|
||||||
audio_type: 'mp3'
|
audio_type: 'mp3'
|
||||||
max_workers: 5
|
max_workers: 5
|
||||||
pricing:
|
pricing:
|
||||||
|
|
|
@ -21,7 +21,7 @@ model_properties:
|
||||||
- mode: 'shimmer'
|
- mode: 'shimmer'
|
||||||
name: 'Shimmer'
|
name: 'Shimmer'
|
||||||
language: ['zh-Hans', 'en-US', 'de-DE', 'fr-FR', 'es-ES', 'it-IT', 'th-TH', 'id-ID']
|
language: ['zh-Hans', 'en-US', 'de-DE', 'fr-FR', 'es-ES', 'it-IT', 'th-TH', 'id-ID']
|
||||||
word_limit: 120
|
word_limit: 3500
|
||||||
audio_type: 'mp3'
|
audio_type: 'mp3'
|
||||||
max_workers: 5
|
max_workers: 5
|
||||||
pricing:
|
pricing:
|
||||||
|
|
|
@ -3,7 +3,7 @@ from functools import reduce
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from flask import Response, stream_with_context
|
from flask import Response
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
from pydub import AudioSegment
|
from pydub import AudioSegment
|
||||||
|
|
||||||
|
@ -11,7 +11,6 @@ from core.model_runtime.errors.invoke import InvokeBadRequestError
|
||||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||||
from core.model_runtime.model_providers.__base.tts_model import TTSModel
|
from core.model_runtime.model_providers.__base.tts_model import TTSModel
|
||||||
from core.model_runtime.model_providers.openai._common import _CommonOpenAI
|
from core.model_runtime.model_providers.openai._common import _CommonOpenAI
|
||||||
from extensions.ext_storage import storage
|
|
||||||
|
|
||||||
|
|
||||||
class OpenAIText2SpeechModel(_CommonOpenAI, TTSModel):
|
class OpenAIText2SpeechModel(_CommonOpenAI, TTSModel):
|
||||||
|
@ -20,7 +19,7 @@ class OpenAIText2SpeechModel(_CommonOpenAI, TTSModel):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def _invoke(self, model: str, tenant_id: str, credentials: dict,
|
def _invoke(self, model: str, tenant_id: str, credentials: dict,
|
||||||
content_text: str, voice: str, streaming: bool, user: Optional[str] = None) -> any:
|
content_text: str, voice: str, user: Optional[str] = None) -> any:
|
||||||
"""
|
"""
|
||||||
_invoke text2speech model
|
_invoke text2speech model
|
||||||
|
|
||||||
|
@ -29,22 +28,17 @@ class OpenAIText2SpeechModel(_CommonOpenAI, TTSModel):
|
||||||
:param credentials: model credentials
|
:param credentials: model credentials
|
||||||
:param content_text: text content to be translated
|
:param content_text: text content to be translated
|
||||||
:param voice: model timbre
|
:param voice: model timbre
|
||||||
:param streaming: output is streaming
|
|
||||||
:param user: unique user id
|
:param user: unique user id
|
||||||
:return: text translated to audio file
|
:return: text translated to audio file
|
||||||
"""
|
"""
|
||||||
audio_type = self._get_model_audio_type(model, credentials)
|
|
||||||
if not voice or voice not in [d['value'] for d in self.get_tts_model_voices(model=model, credentials=credentials)]:
|
if not voice or voice not in [d['value'] for d in self.get_tts_model_voices(model=model, credentials=credentials)]:
|
||||||
voice = self._get_model_default_voice(model, credentials)
|
voice = self._get_model_default_voice(model, credentials)
|
||||||
if streaming:
|
# if streaming:
|
||||||
return Response(stream_with_context(self._tts_invoke_streaming(model=model,
|
return self._tts_invoke_streaming(model=model,
|
||||||
credentials=credentials,
|
credentials=credentials,
|
||||||
content_text=content_text,
|
content_text=content_text,
|
||||||
tenant_id=tenant_id,
|
voice=voice)
|
||||||
voice=voice)),
|
|
||||||
status=200, mimetype=f'audio/{audio_type}')
|
|
||||||
else:
|
|
||||||
return self._tts_invoke(model=model, credentials=credentials, content_text=content_text, voice=voice)
|
|
||||||
|
|
||||||
def validate_credentials(self, model: str, credentials: dict, user: Optional[str] = None) -> None:
|
def validate_credentials(self, model: str, credentials: dict, user: Optional[str] = None) -> None:
|
||||||
"""
|
"""
|
||||||
|
@ -79,7 +73,7 @@ class OpenAIText2SpeechModel(_CommonOpenAI, TTSModel):
|
||||||
word_limit = self._get_model_word_limit(model, credentials)
|
word_limit = self._get_model_word_limit(model, credentials)
|
||||||
max_workers = self._get_model_workers_limit(model, credentials)
|
max_workers = self._get_model_workers_limit(model, credentials)
|
||||||
try:
|
try:
|
||||||
sentences = list(self._split_text_into_sentences(text=content_text, limit=word_limit))
|
sentences = list(self._split_text_into_sentences(org_text=content_text, max_length=word_limit))
|
||||||
audio_bytes_list = []
|
audio_bytes_list = []
|
||||||
|
|
||||||
# Create a thread pool and map the function to the list of sentences
|
# Create a thread pool and map the function to the list of sentences
|
||||||
|
@ -104,34 +98,40 @@ class OpenAIText2SpeechModel(_CommonOpenAI, TTSModel):
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
raise InvokeBadRequestError(str(ex))
|
raise InvokeBadRequestError(str(ex))
|
||||||
|
|
||||||
# Todo: To improve the streaming function
|
|
||||||
def _tts_invoke_streaming(self, model: str, tenant_id: str, credentials: dict, content_text: str,
|
def _tts_invoke_streaming(self, model: str, credentials: dict, content_text: str,
|
||||||
voice: str) -> any:
|
voice: str) -> any:
|
||||||
"""
|
"""
|
||||||
_tts_invoke_streaming text2speech model
|
_tts_invoke_streaming text2speech model
|
||||||
|
|
||||||
:param model: model name
|
:param model: model name
|
||||||
:param tenant_id: user tenant id
|
|
||||||
:param credentials: model credentials
|
:param credentials: model credentials
|
||||||
:param content_text: text content to be translated
|
:param content_text: text content to be translated
|
||||||
:param voice: model timbre
|
:param voice: model timbre
|
||||||
:return: text translated to audio file
|
:return: text translated to audio file
|
||||||
"""
|
"""
|
||||||
# transform credentials to kwargs for model instance
|
|
||||||
credentials_kwargs = self._to_credential_kwargs(credentials)
|
|
||||||
if not voice or voice not in self.get_tts_model_voices(model=model, credentials=credentials):
|
|
||||||
voice = self._get_model_default_voice(model, credentials)
|
|
||||||
word_limit = self._get_model_word_limit(model, credentials)
|
|
||||||
audio_type = self._get_model_audio_type(model, credentials)
|
|
||||||
tts_file_id = self._get_file_name(content_text)
|
|
||||||
file_path = f'generate_files/audio/{tenant_id}/{tts_file_id}.{audio_type}'
|
|
||||||
try:
|
try:
|
||||||
|
# doc: https://platform.openai.com/docs/guides/text-to-speech
|
||||||
|
credentials_kwargs = self._to_credential_kwargs(credentials)
|
||||||
client = OpenAI(**credentials_kwargs)
|
client = OpenAI(**credentials_kwargs)
|
||||||
sentences = list(self._split_text_into_sentences(text=content_text, limit=word_limit))
|
if not voice or voice not in self.get_tts_model_voices(model=model, credentials=credentials):
|
||||||
for sentence in sentences:
|
voice = self._get_model_default_voice(model, credentials)
|
||||||
response = client.audio.speech.create(model=model, voice=voice, input=sentence.strip())
|
word_limit = self._get_model_word_limit(model, credentials)
|
||||||
# response.stream_to_file(file_path)
|
if len(content_text) > word_limit:
|
||||||
storage.save(file_path, response.read())
|
sentences = self._split_text_into_sentences(content_text, max_length=word_limit)
|
||||||
|
executor = concurrent.futures.ThreadPoolExecutor(max_workers=min(3, len(sentences)))
|
||||||
|
futures = [executor.submit(client.audio.speech.with_streaming_response.create, model=model,
|
||||||
|
response_format="mp3",
|
||||||
|
input=sentences[i], voice=voice) for i in range(len(sentences))]
|
||||||
|
for index, future in enumerate(futures):
|
||||||
|
yield from future.result().__enter__().iter_bytes(1024)
|
||||||
|
|
||||||
|
else:
|
||||||
|
response = client.audio.speech.with_streaming_response.create(model=model, voice=voice,
|
||||||
|
response_format="mp3",
|
||||||
|
input=content_text.strip())
|
||||||
|
|
||||||
|
yield from response.__enter__().iter_bytes(1024)
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
raise InvokeBadRequestError(str(ex))
|
raise InvokeBadRequestError(str(ex))
|
||||||
|
|
||||||
|
|
|
@ -129,7 +129,7 @@ model_properties:
|
||||||
- mode: "sambert-waan-v1"
|
- mode: "sambert-waan-v1"
|
||||||
name: "Waan(泰语女声)"
|
name: "Waan(泰语女声)"
|
||||||
language: [ "th-TH" ]
|
language: [ "th-TH" ]
|
||||||
word_limit: 120
|
word_limit: 7000
|
||||||
audio_type: 'mp3'
|
audio_type: 'mp3'
|
||||||
max_workers: 5
|
max_workers: 5
|
||||||
pricing:
|
pricing:
|
||||||
|
|
|
@ -1,17 +1,21 @@
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
|
import threading
|
||||||
from functools import reduce
|
from functools import reduce
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
from queue import Queue
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import dashscope
|
import dashscope
|
||||||
from flask import Response, stream_with_context
|
from dashscope import SpeechSynthesizer
|
||||||
|
from dashscope.api_entities.dashscope_response import SpeechSynthesisResponse
|
||||||
|
from dashscope.audio.tts import ResultCallback, SpeechSynthesisResult
|
||||||
|
from flask import Response
|
||||||
from pydub import AudioSegment
|
from pydub import AudioSegment
|
||||||
|
|
||||||
from core.model_runtime.errors.invoke import InvokeBadRequestError
|
from core.model_runtime.errors.invoke import InvokeBadRequestError
|
||||||
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
from core.model_runtime.errors.validate import CredentialsValidateFailedError
|
||||||
from core.model_runtime.model_providers.__base.tts_model import TTSModel
|
from core.model_runtime.model_providers.__base.tts_model import TTSModel
|
||||||
from core.model_runtime.model_providers.tongyi._common import _CommonTongyi
|
from core.model_runtime.model_providers.tongyi._common import _CommonTongyi
|
||||||
from extensions.ext_storage import storage
|
|
||||||
|
|
||||||
|
|
||||||
class TongyiText2SpeechModel(_CommonTongyi, TTSModel):
|
class TongyiText2SpeechModel(_CommonTongyi, TTSModel):
|
||||||
|
@ -19,7 +23,7 @@ class TongyiText2SpeechModel(_CommonTongyi, TTSModel):
|
||||||
Model class for Tongyi Speech to text model.
|
Model class for Tongyi Speech to text model.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def _invoke(self, model: str, tenant_id: str, credentials: dict, content_text: str, voice: str, streaming: bool,
|
def _invoke(self, model: str, tenant_id: str, credentials: dict, content_text: str, voice: str,
|
||||||
user: Optional[str] = None) -> any:
|
user: Optional[str] = None) -> any:
|
||||||
"""
|
"""
|
||||||
_invoke text2speech model
|
_invoke text2speech model
|
||||||
|
@ -29,22 +33,17 @@ class TongyiText2SpeechModel(_CommonTongyi, TTSModel):
|
||||||
:param credentials: model credentials
|
:param credentials: model credentials
|
||||||
:param voice: model timbre
|
:param voice: model timbre
|
||||||
:param content_text: text content to be translated
|
:param content_text: text content to be translated
|
||||||
:param streaming: output is streaming
|
|
||||||
:param user: unique user id
|
:param user: unique user id
|
||||||
:return: text translated to audio file
|
:return: text translated to audio file
|
||||||
"""
|
"""
|
||||||
audio_type = self._get_model_audio_type(model, credentials)
|
if not voice or voice not in [d['value'] for d in
|
||||||
if not voice or voice not in [d['value'] for d in self.get_tts_model_voices(model=model, credentials=credentials)]:
|
self.get_tts_model_voices(model=model, credentials=credentials)]:
|
||||||
voice = self._get_model_default_voice(model, credentials)
|
voice = self._get_model_default_voice(model, credentials)
|
||||||
if streaming:
|
|
||||||
return Response(stream_with_context(self._tts_invoke_streaming(model=model,
|
return self._tts_invoke_streaming(model=model,
|
||||||
credentials=credentials,
|
credentials=credentials,
|
||||||
content_text=content_text,
|
content_text=content_text,
|
||||||
voice=voice,
|
voice=voice)
|
||||||
tenant_id=tenant_id)),
|
|
||||||
status=200, mimetype=f'audio/{audio_type}')
|
|
||||||
else:
|
|
||||||
return self._tts_invoke(model=model, credentials=credentials, content_text=content_text, voice=voice)
|
|
||||||
|
|
||||||
def validate_credentials(self, model: str, credentials: dict, user: Optional[str] = None) -> None:
|
def validate_credentials(self, model: str, credentials: dict, user: Optional[str] = None) -> None:
|
||||||
"""
|
"""
|
||||||
|
@ -79,7 +78,7 @@ class TongyiText2SpeechModel(_CommonTongyi, TTSModel):
|
||||||
word_limit = self._get_model_word_limit(model, credentials)
|
word_limit = self._get_model_word_limit(model, credentials)
|
||||||
max_workers = self._get_model_workers_limit(model, credentials)
|
max_workers = self._get_model_workers_limit(model, credentials)
|
||||||
try:
|
try:
|
||||||
sentences = list(self._split_text_into_sentences(text=content_text, limit=word_limit))
|
sentences = list(self._split_text_into_sentences(org_text=content_text, max_length=word_limit))
|
||||||
audio_bytes_list = []
|
audio_bytes_list = []
|
||||||
|
|
||||||
# Create a thread pool and map the function to the list of sentences
|
# Create a thread pool and map the function to the list of sentences
|
||||||
|
@ -105,14 +104,12 @@ class TongyiText2SpeechModel(_CommonTongyi, TTSModel):
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
raise InvokeBadRequestError(str(ex))
|
raise InvokeBadRequestError(str(ex))
|
||||||
|
|
||||||
# Todo: To improve the streaming function
|
def _tts_invoke_streaming(self, model: str, credentials: dict, content_text: str,
|
||||||
def _tts_invoke_streaming(self, model: str, tenant_id: str, credentials: dict, content_text: str,
|
|
||||||
voice: str) -> any:
|
voice: str) -> any:
|
||||||
"""
|
"""
|
||||||
_tts_invoke_streaming text2speech model
|
_tts_invoke_streaming text2speech model
|
||||||
|
|
||||||
:param model: model name
|
:param model: model name
|
||||||
:param tenant_id: user tenant id
|
|
||||||
:param credentials: model credentials
|
:param credentials: model credentials
|
||||||
:param voice: model timbre
|
:param voice: model timbre
|
||||||
:param content_text: text content to be translated
|
:param content_text: text content to be translated
|
||||||
|
@ -120,18 +117,32 @@ class TongyiText2SpeechModel(_CommonTongyi, TTSModel):
|
||||||
"""
|
"""
|
||||||
word_limit = self._get_model_word_limit(model, credentials)
|
word_limit = self._get_model_word_limit(model, credentials)
|
||||||
audio_type = self._get_model_audio_type(model, credentials)
|
audio_type = self._get_model_audio_type(model, credentials)
|
||||||
tts_file_id = self._get_file_name(content_text)
|
|
||||||
file_path = f'generate_files/audio/{tenant_id}/{tts_file_id}.{audio_type}'
|
|
||||||
try:
|
try:
|
||||||
sentences = list(self._split_text_into_sentences(text=content_text, limit=word_limit))
|
audio_queue: Queue = Queue()
|
||||||
for sentence in sentences:
|
callback = Callback(queue=audio_queue)
|
||||||
response = dashscope.audio.tts.SpeechSynthesizer.call(model=voice, sample_rate=48000,
|
|
||||||
api_key=credentials.get('dashscope_api_key'),
|
def invoke_remote(content, v, api_key, cb, at, wl):
|
||||||
text=sentence.strip(),
|
if len(content) < word_limit:
|
||||||
format=audio_type, word_timestamp_enabled=True,
|
sentences = [content]
|
||||||
phoneme_timestamp_enabled=True)
|
else:
|
||||||
if isinstance(response.get_audio_data(), bytes):
|
sentences = list(self._split_text_into_sentences(org_text=content, max_length=wl))
|
||||||
storage.save(file_path, response.get_audio_data())
|
for sentence in sentences:
|
||||||
|
SpeechSynthesizer.call(model=v, sample_rate=16000,
|
||||||
|
api_key=api_key,
|
||||||
|
text=sentence.strip(),
|
||||||
|
callback=cb,
|
||||||
|
format=at, word_timestamp_enabled=True,
|
||||||
|
phoneme_timestamp_enabled=True)
|
||||||
|
|
||||||
|
threading.Thread(target=invoke_remote, args=(
|
||||||
|
content_text, voice, credentials.get('dashscope_api_key'), callback, audio_type, word_limit)).start()
|
||||||
|
|
||||||
|
while True:
|
||||||
|
audio = audio_queue.get()
|
||||||
|
if audio is None:
|
||||||
|
break
|
||||||
|
yield audio
|
||||||
|
|
||||||
except Exception as ex:
|
except Exception as ex:
|
||||||
raise InvokeBadRequestError(str(ex))
|
raise InvokeBadRequestError(str(ex))
|
||||||
|
|
||||||
|
@ -152,3 +163,29 @@ class TongyiText2SpeechModel(_CommonTongyi, TTSModel):
|
||||||
format=audio_type)
|
format=audio_type)
|
||||||
if isinstance(response.get_audio_data(), bytes):
|
if isinstance(response.get_audio_data(), bytes):
|
||||||
return response.get_audio_data()
|
return response.get_audio_data()
|
||||||
|
|
||||||
|
|
||||||
|
class Callback(ResultCallback):
|
||||||
|
|
||||||
|
def __init__(self, queue: Queue):
|
||||||
|
self._queue = queue
|
||||||
|
|
||||||
|
def on_open(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def on_complete(self):
|
||||||
|
self._queue.put(None)
|
||||||
|
self._queue.task_done()
|
||||||
|
|
||||||
|
def on_error(self, response: SpeechSynthesisResponse):
|
||||||
|
self._queue.put(None)
|
||||||
|
self._queue.task_done()
|
||||||
|
|
||||||
|
def on_close(self):
|
||||||
|
self._queue.put(None)
|
||||||
|
self._queue.task_done()
|
||||||
|
|
||||||
|
def on_event(self, result: SpeechSynthesisResult):
|
||||||
|
ad = result.get_audio_frame()
|
||||||
|
if ad:
|
||||||
|
self._queue.put(ad)
|
||||||
|
|
|
@ -49,7 +49,7 @@ ignore = [
|
||||||
"B006", # mutable-argument-default
|
"B006", # mutable-argument-default
|
||||||
"B007", # unused-loop-control-variable
|
"B007", # unused-loop-control-variable
|
||||||
"B026", # star-arg-unpacking-after-keyword-arg
|
"B026", # star-arg-unpacking-after-keyword-arg
|
||||||
"B901", # return-in-generator
|
# "B901", # return-in-generator
|
||||||
"B904", # raise-without-from-inside-except
|
"B904", # raise-without-from-inside-except
|
||||||
"B905", # zip-without-explicit-strict
|
"B905", # zip-without-explicit-strict
|
||||||
]
|
]
|
||||||
|
|
|
@ -123,6 +123,8 @@ class AppService:
|
||||||
app.icon = args['icon']
|
app.icon = args['icon']
|
||||||
app.icon_background = args['icon_background']
|
app.icon_background = args['icon_background']
|
||||||
app.tenant_id = tenant_id
|
app.tenant_id = tenant_id
|
||||||
|
app.api_rph = args.get('api_rph', 0)
|
||||||
|
app.api_rpm = args.get('api_rpm', 0)
|
||||||
|
|
||||||
db.session.add(app)
|
db.session.add(app)
|
||||||
db.session.flush()
|
db.session.flush()
|
||||||
|
|
|
@ -1,11 +1,12 @@
|
||||||
import io
|
import io
|
||||||
|
import logging
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from werkzeug.datastructures import FileStorage
|
from werkzeug.datastructures import FileStorage
|
||||||
|
|
||||||
from core.model_manager import ModelManager
|
from core.model_manager import ModelManager
|
||||||
from core.model_runtime.entities.model_entities import ModelType
|
from core.model_runtime.entities.model_entities import ModelType
|
||||||
from models.model import App, AppMode, AppModelConfig
|
from models.model import App, AppMode, AppModelConfig, Message
|
||||||
from services.errors.audio import (
|
from services.errors.audio import (
|
||||||
AudioTooLargeServiceError,
|
AudioTooLargeServiceError,
|
||||||
NoAudioUploadedServiceError,
|
NoAudioUploadedServiceError,
|
||||||
|
@ -18,6 +19,8 @@ FILE_SIZE = 30
|
||||||
FILE_SIZE_LIMIT = FILE_SIZE * 1024 * 1024
|
FILE_SIZE_LIMIT = FILE_SIZE * 1024 * 1024
|
||||||
ALLOWED_EXTENSIONS = ['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm', 'amr']
|
ALLOWED_EXTENSIONS = ['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm', 'amr']
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class AudioService:
|
class AudioService:
|
||||||
@classmethod
|
@classmethod
|
||||||
|
@ -64,51 +67,74 @@ class AudioService:
|
||||||
return {"text": model_instance.invoke_speech2text(file=buffer, user=end_user)}
|
return {"text": model_instance.invoke_speech2text(file=buffer, user=end_user)}
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def transcript_tts(cls, app_model: App, text: str, streaming: bool,
|
def transcript_tts(cls, app_model: App, text: Optional[str] = None,
|
||||||
voice: Optional[str] = None, end_user: Optional[str] = None):
|
voice: Optional[str] = None, end_user: Optional[str] = None, message_id: Optional[str] = None):
|
||||||
if app_model.mode in [AppMode.ADVANCED_CHAT.value, AppMode.WORKFLOW.value]:
|
from collections.abc import Generator
|
||||||
workflow = app_model.workflow
|
|
||||||
if workflow is None:
|
|
||||||
raise ValueError("TTS is not enabled")
|
|
||||||
|
|
||||||
features_dict = workflow.features_dict
|
from flask import Response, stream_with_context
|
||||||
if 'text_to_speech' not in features_dict or not features_dict['text_to_speech'].get('enabled'):
|
|
||||||
raise ValueError("TTS is not enabled")
|
|
||||||
|
|
||||||
voice = features_dict['text_to_speech'].get('voice') if voice is None else voice
|
from app import app
|
||||||
else:
|
from extensions.ext_database import db
|
||||||
text_to_speech_dict = app_model.app_model_config.text_to_speech_dict
|
|
||||||
|
|
||||||
if not text_to_speech_dict.get('enabled'):
|
def invoke_tts(text_content: str, app_model, voice: Optional[str] = None):
|
||||||
raise ValueError("TTS is not enabled")
|
with app.app_context():
|
||||||
|
if app_model.mode in [AppMode.ADVANCED_CHAT.value, AppMode.WORKFLOW.value]:
|
||||||
|
workflow = app_model.workflow
|
||||||
|
if workflow is None:
|
||||||
|
raise ValueError("TTS is not enabled")
|
||||||
|
|
||||||
voice = text_to_speech_dict.get('voice') if voice is None else voice
|
features_dict = workflow.features_dict
|
||||||
|
if 'text_to_speech' not in features_dict or not features_dict['text_to_speech'].get('enabled'):
|
||||||
|
raise ValueError("TTS is not enabled")
|
||||||
|
|
||||||
model_manager = ModelManager()
|
voice = features_dict['text_to_speech'].get('voice') if voice is None else voice
|
||||||
model_instance = model_manager.get_default_model_instance(
|
|
||||||
tenant_id=app_model.tenant_id,
|
|
||||||
model_type=ModelType.TTS
|
|
||||||
)
|
|
||||||
if model_instance is None:
|
|
||||||
raise ProviderNotSupportTextToSpeechServiceError()
|
|
||||||
|
|
||||||
try:
|
|
||||||
if not voice:
|
|
||||||
voices = model_instance.get_tts_voices()
|
|
||||||
if voices:
|
|
||||||
voice = voices[0].get('value')
|
|
||||||
else:
|
else:
|
||||||
raise ValueError("Sorry, no voice available.")
|
text_to_speech_dict = app_model.app_model_config.text_to_speech_dict
|
||||||
|
|
||||||
return model_instance.invoke_tts(
|
if not text_to_speech_dict.get('enabled'):
|
||||||
content_text=text.strip(),
|
raise ValueError("TTS is not enabled")
|
||||||
user=end_user,
|
|
||||||
streaming=streaming,
|
voice = text_to_speech_dict.get('voice') if voice is None else voice
|
||||||
tenant_id=app_model.tenant_id,
|
|
||||||
voice=voice
|
model_manager = ModelManager()
|
||||||
)
|
model_instance = model_manager.get_default_model_instance(
|
||||||
except Exception as e:
|
tenant_id=app_model.tenant_id,
|
||||||
raise e
|
model_type=ModelType.TTS
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
if not voice:
|
||||||
|
voices = model_instance.get_tts_voices()
|
||||||
|
if voices:
|
||||||
|
voice = voices[0].get('value')
|
||||||
|
else:
|
||||||
|
raise ValueError("Sorry, no voice available.")
|
||||||
|
|
||||||
|
return model_instance.invoke_tts(
|
||||||
|
content_text=text_content.strip(),
|
||||||
|
user=end_user,
|
||||||
|
tenant_id=app_model.tenant_id,
|
||||||
|
voice=voice
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
raise e
|
||||||
|
|
||||||
|
if message_id:
|
||||||
|
message = db.session.query(Message).filter(
|
||||||
|
Message.id == message_id
|
||||||
|
).first()
|
||||||
|
if message.answer == '' and message.status == 'normal':
|
||||||
|
return None
|
||||||
|
|
||||||
|
else:
|
||||||
|
response = invoke_tts(message.answer, app_model=app_model, voice=voice)
|
||||||
|
if isinstance(response, Generator):
|
||||||
|
return Response(stream_with_context(response), content_type='audio/mpeg')
|
||||||
|
return response
|
||||||
|
else:
|
||||||
|
response = invoke_tts(text, app_model, voice)
|
||||||
|
if isinstance(response, Generator):
|
||||||
|
return Response(stream_with_context(response), content_type='audio/mpeg')
|
||||||
|
return response
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def transcript_tts_voices(cls, tenant_id: str, language: str):
|
def transcript_tts_voices(cls, tenant_id: str, language: str):
|
||||||
|
|
|
@ -11,11 +11,13 @@ import { usePathname } from 'next/navigation'
|
||||||
import { useTranslation } from 'react-i18next'
|
import { useTranslation } from 'react-i18next'
|
||||||
import { Listbox, Transition } from '@headlessui/react'
|
import { Listbox, Transition } from '@headlessui/react'
|
||||||
import { CheckIcon, ChevronDownIcon } from '@heroicons/react/20/solid'
|
import { CheckIcon, ChevronDownIcon } from '@heroicons/react/20/solid'
|
||||||
|
import RadioGroup from '@/app/components/app/configuration/config-vision/radio-group'
|
||||||
import type { Item } from '@/app/components/base/select'
|
import type { Item } from '@/app/components/base/select'
|
||||||
import ConfigContext from '@/context/debug-configuration'
|
import ConfigContext from '@/context/debug-configuration'
|
||||||
import { fetchAppVoices } from '@/service/apps'
|
import { fetchAppVoices } from '@/service/apps'
|
||||||
import Tooltip from '@/app/components/base/tooltip'
|
import Tooltip from '@/app/components/base/tooltip'
|
||||||
import { languages } from '@/i18n/language'
|
import { languages } from '@/i18n/language'
|
||||||
|
import { TtsAutoPlay } from '@/types/app'
|
||||||
const VoiceParamConfig: FC = () => {
|
const VoiceParamConfig: FC = () => {
|
||||||
const { t } = useTranslation()
|
const { t } = useTranslation()
|
||||||
const pathname = usePathname()
|
const pathname = usePathname()
|
||||||
|
@ -27,12 +29,16 @@ const VoiceParamConfig: FC = () => {
|
||||||
setTextToSpeechConfig,
|
setTextToSpeechConfig,
|
||||||
} = useContext(ConfigContext)
|
} = useContext(ConfigContext)
|
||||||
|
|
||||||
const languageItem = languages.find(item => item.value === textToSpeechConfig.language)
|
let languageItem = languages.find(item => item.value === textToSpeechConfig.language)
|
||||||
const localLanguagePlaceholder = languageItem?.name || t('common.placeholder.select')
|
const localLanguagePlaceholder = languageItem?.name || t('common.placeholder.select')
|
||||||
|
if (languages && !languageItem)
|
||||||
|
languageItem = languages[0]
|
||||||
const language = languageItem?.value
|
const language = languageItem?.value
|
||||||
const voiceItems = useSWR({ appId, language }, fetchAppVoices).data
|
const voiceItems = useSWR({ appId, language }, fetchAppVoices).data
|
||||||
const voiceItem = voiceItems?.find(item => item.value === textToSpeechConfig.voice)
|
let voiceItem = voiceItems?.find(item => item.value === textToSpeechConfig.voice)
|
||||||
|
if (voiceItems && !voiceItem)
|
||||||
|
voiceItem = voiceItems[0]
|
||||||
|
|
||||||
const localVoicePlaceholder = voiceItem?.name || t('common.placeholder.select')
|
const localVoicePlaceholder = voiceItem?.name || t('common.placeholder.select')
|
||||||
|
|
||||||
return (
|
return (
|
||||||
|
@ -42,8 +48,9 @@ const VoiceParamConfig: FC = () => {
|
||||||
<div className='pt-3 space-y-6'>
|
<div className='pt-3 space-y-6'>
|
||||||
<div>
|
<div>
|
||||||
<div className='mb-2 flex items-center space-x-1'>
|
<div className='mb-2 flex items-center space-x-1'>
|
||||||
<div className='leading-[18px] text-[13px] font-semibold text-gray-800'>{t('appDebug.voice.voiceSettings.language')}</div>
|
<div
|
||||||
<Tooltip htmlContent={<div className='w-[180px]' >
|
className='leading-[18px] text-[13px] font-semibold text-gray-800'>{t('appDebug.voice.voiceSettings.language')}</div>
|
||||||
|
<Tooltip htmlContent={<div className='w-[180px]'>
|
||||||
{t('appDebug.voice.voiceSettings.resolutionTooltip').split('\n').map(item => (
|
{t('appDebug.voice.voiceSettings.resolutionTooltip').split('\n').map(item => (
|
||||||
<div key={item}>{item}</div>
|
<div key={item}>{item}</div>
|
||||||
))}
|
))}
|
||||||
|
@ -61,7 +68,8 @@ const VoiceParamConfig: FC = () => {
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
<div className={'relative h-9'}>
|
<div className={'relative h-9'}>
|
||||||
<Listbox.Button className={'w-full h-full rounded-lg border-0 bg-gray-100 py-1.5 pl-3 pr-10 sm:text-sm sm:leading-6 focus-visible:outline-none focus-visible:bg-gray-200 group-hover:bg-gray-200 cursor-pointer'}>
|
<Listbox.Button
|
||||||
|
className={'w-full h-full rounded-lg border-0 bg-gray-100 py-1.5 pl-3 pr-10 sm:text-sm sm:leading-6 focus-visible:outline-none focus-visible:bg-gray-200 group-hover:bg-gray-200 cursor-pointer'}>
|
||||||
<span className={classNames('block truncate text-left', !languageItem?.name && 'text-gray-400')}>
|
<span className={classNames('block truncate text-left', !languageItem?.name && 'text-gray-400')}>
|
||||||
{languageItem?.name ? t(`common.voice.language.${languageItem?.value.replace('-', '')}`) : localLanguagePlaceholder}
|
{languageItem?.name ? t(`common.voice.language.${languageItem?.value.replace('-', '')}`) : localLanguagePlaceholder}
|
||||||
</span>
|
</span>
|
||||||
|
@ -79,7 +87,8 @@ const VoiceParamConfig: FC = () => {
|
||||||
leaveTo="opacity-0"
|
leaveTo="opacity-0"
|
||||||
>
|
>
|
||||||
|
|
||||||
<Listbox.Options className="absolute z-10 mt-1 px-1 max-h-60 w-full overflow-auto rounded-md bg-white py-1 text-base shadow-lg border-gray-200 border-[0.5px] focus:outline-none sm:text-sm">
|
<Listbox.Options
|
||||||
|
className="absolute z-10 mt-1 px-1 max-h-60 w-full overflow-auto rounded-md bg-white py-1 text-base shadow-lg border-gray-200 border-[0.5px] focus:outline-none sm:text-sm">
|
||||||
{languages.map((item: Item) => (
|
{languages.map((item: Item) => (
|
||||||
<Listbox.Option
|
<Listbox.Option
|
||||||
key={item.value}
|
key={item.value}
|
||||||
|
@ -100,7 +109,7 @@ const VoiceParamConfig: FC = () => {
|
||||||
'absolute inset-y-0 right-0 flex items-center pr-4 text-gray-700',
|
'absolute inset-y-0 right-0 flex items-center pr-4 text-gray-700',
|
||||||
)}
|
)}
|
||||||
>
|
>
|
||||||
<CheckIcon className="h-5 w-5" aria-hidden="true" />
|
<CheckIcon className="h-5 w-5" aria-hidden="true"/>
|
||||||
</span>
|
</span>
|
||||||
)}
|
)}
|
||||||
</>
|
</>
|
||||||
|
@ -112,9 +121,9 @@ const VoiceParamConfig: FC = () => {
|
||||||
</div>
|
</div>
|
||||||
</Listbox>
|
</Listbox>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div>
|
<div>
|
||||||
<div className='mb-2 leading-[18px] text-[13px] font-semibold text-gray-800'>{t('appDebug.voice.voiceSettings.voice')}</div>
|
<div
|
||||||
|
className='mb-2 leading-[18px] text-[13px] font-semibold text-gray-800'>{t('appDebug.voice.voiceSettings.voice')}</div>
|
||||||
<Listbox
|
<Listbox
|
||||||
value={voiceItem}
|
value={voiceItem}
|
||||||
disabled={!languageItem}
|
disabled={!languageItem}
|
||||||
|
@ -126,8 +135,10 @@ const VoiceParamConfig: FC = () => {
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
<div className={'relative h-9'}>
|
<div className={'relative h-9'}>
|
||||||
<Listbox.Button className={'w-full h-full rounded-lg border-0 bg-gray-100 py-1.5 pl-3 pr-10 sm:text-sm sm:leading-6 focus-visible:outline-none focus-visible:bg-gray-200 group-hover:bg-gray-200 cursor-pointer'}>
|
<Listbox.Button
|
||||||
<span className={classNames('block truncate text-left', !voiceItem?.name && 'text-gray-400')}>{voiceItem?.name ?? localVoicePlaceholder}</span>
|
className={'w-full h-full rounded-lg border-0 bg-gray-100 py-1.5 pl-3 pr-10 sm:text-sm sm:leading-6 focus-visible:outline-none focus-visible:bg-gray-200 group-hover:bg-gray-200 cursor-pointer'}>
|
||||||
|
<span
|
||||||
|
className={classNames('block truncate text-left', !voiceItem?.name && 'text-gray-400')}>{voiceItem?.name ?? localVoicePlaceholder}</span>
|
||||||
<span className="pointer-events-none absolute inset-y-0 right-0 flex items-center pr-2">
|
<span className="pointer-events-none absolute inset-y-0 right-0 flex items-center pr-2">
|
||||||
<ChevronDownIcon
|
<ChevronDownIcon
|
||||||
className="h-5 w-5 text-gray-400"
|
className="h-5 w-5 text-gray-400"
|
||||||
|
@ -142,7 +153,8 @@ const VoiceParamConfig: FC = () => {
|
||||||
leaveTo="opacity-0"
|
leaveTo="opacity-0"
|
||||||
>
|
>
|
||||||
|
|
||||||
<Listbox.Options className="absolute z-10 mt-1 px-1 max-h-60 w-full overflow-auto rounded-md bg-white py-1 text-base shadow-lg border-gray-200 border-[0.5px] focus:outline-none sm:text-sm">
|
<Listbox.Options
|
||||||
|
className="absolute z-10 mt-1 px-1 max-h-60 w-full overflow-auto rounded-md bg-white py-1 text-base shadow-lg border-gray-200 border-[0.5px] focus:outline-none sm:text-sm">
|
||||||
{voiceItems?.map((item: Item) => (
|
{voiceItems?.map((item: Item) => (
|
||||||
<Listbox.Option
|
<Listbox.Option
|
||||||
key={item.value}
|
key={item.value}
|
||||||
|
@ -162,7 +174,7 @@ const VoiceParamConfig: FC = () => {
|
||||||
'absolute inset-y-0 right-0 flex items-center pr-4 text-gray-700',
|
'absolute inset-y-0 right-0 flex items-center pr-4 text-gray-700',
|
||||||
)}
|
)}
|
||||||
>
|
>
|
||||||
<CheckIcon className="h-5 w-5" aria-hidden="true" />
|
<CheckIcon className="h-5 w-5" aria-hidden="true"/>
|
||||||
</span>
|
</span>
|
||||||
)}
|
)}
|
||||||
</>
|
</>
|
||||||
|
@ -174,6 +186,30 @@ const VoiceParamConfig: FC = () => {
|
||||||
</div>
|
</div>
|
||||||
</Listbox>
|
</Listbox>
|
||||||
</div>
|
</div>
|
||||||
|
<div>
|
||||||
|
<div
|
||||||
|
className='mb-2 leading-[18px] text-[13px] font-semibold text-gray-800'>{t('appDebug.voice.voiceSettings.autoPlay')}</div>
|
||||||
|
<RadioGroup
|
||||||
|
className='space-x-3'
|
||||||
|
options={[
|
||||||
|
{
|
||||||
|
label: t('appDebug.voice.voiceSettings.autoPlayEnabled'),
|
||||||
|
value: TtsAutoPlay.enabled,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: t('appDebug.voice.voiceSettings.autoPlayDisabled'),
|
||||||
|
value: TtsAutoPlay.disabled,
|
||||||
|
},
|
||||||
|
]}
|
||||||
|
value={textToSpeechConfig.autoPlay ? textToSpeechConfig.autoPlay : TtsAutoPlay.disabled}
|
||||||
|
onChange={(value: TtsAutoPlay) => {
|
||||||
|
setTextToSpeechConfig({
|
||||||
|
...textToSpeechConfig,
|
||||||
|
autoPlay: value,
|
||||||
|
})
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
@ -40,7 +40,6 @@ const TextToSpeech: FC = () => {
|
||||||
{ languageInfo?.example && (
|
{ languageInfo?.example && (
|
||||||
<AudioBtn
|
<AudioBtn
|
||||||
value={languageInfo?.example}
|
value={languageInfo?.example}
|
||||||
voice={voiceItem?.value}
|
|
||||||
isAudition
|
isAudition
|
||||||
noCache
|
noCache
|
||||||
/>
|
/>
|
||||||
|
|
|
@ -428,8 +428,7 @@ const GenerationItem: FC<IGenerationItemProps> = ({
|
||||||
<>
|
<>
|
||||||
<div className='ml-2 mr-2 h-[14px] w-[1px] bg-gray-200'></div>
|
<div className='ml-2 mr-2 h-[14px] w-[1px] bg-gray-200'></div>
|
||||||
<AudioBtn
|
<AudioBtn
|
||||||
value={content}
|
id={messageId!}
|
||||||
noCache={false}
|
|
||||||
className={'mr-1'}
|
className={'mr-1'}
|
||||||
/>
|
/>
|
||||||
</>
|
</>
|
||||||
|
|
53
web/app/components/base/audio-btn/audio.player.manager.ts
Normal file
53
web/app/components/base/audio-btn/audio.player.manager.ts
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
import AudioPlayer from '@/app/components/base/audio-btn/audio'
|
||||||
|
declare global {
|
||||||
|
// eslint-disable-next-line @typescript-eslint/consistent-type-definitions
|
||||||
|
interface AudioPlayerManager {
|
||||||
|
instance: AudioPlayerManager
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
export class AudioPlayerManager {
|
||||||
|
private static instance: AudioPlayerManager
|
||||||
|
private audioPlayers: AudioPlayer | null = null
|
||||||
|
private msgId: string | undefined
|
||||||
|
|
||||||
|
private constructor() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public static getInstance(): AudioPlayerManager {
|
||||||
|
if (!AudioPlayerManager.instance) {
|
||||||
|
AudioPlayerManager.instance = new AudioPlayerManager()
|
||||||
|
this.instance = AudioPlayerManager.instance
|
||||||
|
}
|
||||||
|
|
||||||
|
return AudioPlayerManager.instance
|
||||||
|
}
|
||||||
|
|
||||||
|
public getAudioPlayer(url: string, isPublic: boolean, id: string | undefined, msgContent: string | null | undefined, voice: string | undefined, callback: ((event: string) => {}) | null): AudioPlayer {
|
||||||
|
if (this.msgId && this.msgId === id && this.audioPlayers) {
|
||||||
|
this.audioPlayers.setCallback(callback)
|
||||||
|
return this.audioPlayers
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (this.audioPlayers) {
|
||||||
|
try {
|
||||||
|
this.audioPlayers.pauseAudio()
|
||||||
|
this.audioPlayers.cacheBuffers = []
|
||||||
|
this.audioPlayers.sourceBuffer?.abort()
|
||||||
|
}
|
||||||
|
catch (e) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
this.msgId = id
|
||||||
|
this.audioPlayers = new AudioPlayer(url, isPublic, id, msgContent, callback)
|
||||||
|
return this.audioPlayers
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public resetMsgId(msgId: string) {
|
||||||
|
this.msgId = msgId
|
||||||
|
this.audioPlayers?.resetMsgId(msgId)
|
||||||
|
}
|
||||||
|
}
|
263
web/app/components/base/audio-btn/audio.ts
Normal file
263
web/app/components/base/audio-btn/audio.ts
Normal file
|
@ -0,0 +1,263 @@
|
||||||
|
import Toast from '@/app/components/base/toast'
|
||||||
|
import { textToAudioStream } from '@/service/share'
|
||||||
|
|
||||||
|
declare global {
|
||||||
|
// eslint-disable-next-line @typescript-eslint/consistent-type-definitions
|
||||||
|
interface Window {
|
||||||
|
ManagedMediaSource: any
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export default class AudioPlayer {
|
||||||
|
mediaSource: MediaSource | null
|
||||||
|
audio: HTMLAudioElement
|
||||||
|
audioContext: AudioContext
|
||||||
|
sourceBuffer?: SourceBuffer
|
||||||
|
cacheBuffers: ArrayBuffer[] = []
|
||||||
|
pauseTimer: number | null = null
|
||||||
|
msgId: string | undefined
|
||||||
|
msgContent: string | null | undefined = null
|
||||||
|
voice: string | undefined = undefined
|
||||||
|
isLoadData = false
|
||||||
|
url: string
|
||||||
|
isPublic: boolean
|
||||||
|
callback: ((event: string) => {}) | null
|
||||||
|
|
||||||
|
constructor(streamUrl: string, isPublic: boolean, msgId: string | undefined, msgContent: string | null | undefined, callback: ((event: string) => {}) | null) {
|
||||||
|
this.audioContext = new AudioContext()
|
||||||
|
this.msgId = msgId
|
||||||
|
this.msgContent = msgContent
|
||||||
|
this.url = streamUrl
|
||||||
|
this.isPublic = isPublic
|
||||||
|
this.callback = callback
|
||||||
|
|
||||||
|
// Compatible with iphone ios17 ManagedMediaSource
|
||||||
|
const MediaSource = window.MediaSource || window.ManagedMediaSource
|
||||||
|
if (!MediaSource) {
|
||||||
|
Toast.notify({
|
||||||
|
message: 'Your browser does not support audio streaming, if you are using an iPhone, please update to iOS 17.1 or later.',
|
||||||
|
type: 'error',
|
||||||
|
})
|
||||||
|
}
|
||||||
|
this.mediaSource = MediaSource ? new MediaSource() : null
|
||||||
|
this.audio = new Audio()
|
||||||
|
this.setCallback(callback)
|
||||||
|
this.audio.src = this.mediaSource ? URL.createObjectURL(this.mediaSource) : ''
|
||||||
|
this.audio.autoplay = true
|
||||||
|
|
||||||
|
const source = this.audioContext.createMediaElementSource(this.audio)
|
||||||
|
source.connect(this.audioContext.destination)
|
||||||
|
this.listenMediaSource('audio/mpeg')
|
||||||
|
}
|
||||||
|
|
||||||
|
public resetMsgId(msgId: string) {
|
||||||
|
this.msgId = msgId
|
||||||
|
}
|
||||||
|
|
||||||
|
private listenMediaSource(contentType: string) {
|
||||||
|
this.mediaSource?.addEventListener('sourceopen', () => {
|
||||||
|
if (this.sourceBuffer)
|
||||||
|
return
|
||||||
|
|
||||||
|
this.sourceBuffer = this.mediaSource?.addSourceBuffer(contentType)
|
||||||
|
// this.sourceBuffer?.addEventListener('update', () => {
|
||||||
|
// if (this.cacheBuffers.length && !this.sourceBuffer?.updating) {
|
||||||
|
// const cacheBuffer = this.cacheBuffers.shift()!
|
||||||
|
// this.sourceBuffer?.appendBuffer(cacheBuffer)
|
||||||
|
// }
|
||||||
|
// // this.pauseAudio()
|
||||||
|
// })
|
||||||
|
//
|
||||||
|
// this.sourceBuffer?.addEventListener('updateend', () => {
|
||||||
|
// if (this.cacheBuffers.length && !this.sourceBuffer?.updating) {
|
||||||
|
// const cacheBuffer = this.cacheBuffers.shift()!
|
||||||
|
// this.sourceBuffer?.appendBuffer(cacheBuffer)
|
||||||
|
// }
|
||||||
|
// // this.pauseAudio()
|
||||||
|
// })
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
public setCallback(callback: ((event: string) => {}) | null) {
|
||||||
|
this.callback = callback
|
||||||
|
if (callback) {
|
||||||
|
this.audio.addEventListener('ended', () => {
|
||||||
|
callback('ended')
|
||||||
|
}, false)
|
||||||
|
this.audio.addEventListener('paused', () => {
|
||||||
|
callback('paused')
|
||||||
|
}, true)
|
||||||
|
this.audio.addEventListener('loaded', () => {
|
||||||
|
callback('loaded')
|
||||||
|
}, true)
|
||||||
|
this.audio.addEventListener('play', () => {
|
||||||
|
callback('play')
|
||||||
|
}, true)
|
||||||
|
this.audio.addEventListener('timeupdate', () => {
|
||||||
|
callback('timeupdate')
|
||||||
|
}, true)
|
||||||
|
this.audio.addEventListener('loadeddate', () => {
|
||||||
|
callback('loadeddate')
|
||||||
|
}, true)
|
||||||
|
this.audio.addEventListener('canplay', () => {
|
||||||
|
callback('canplay')
|
||||||
|
}, true)
|
||||||
|
this.audio.addEventListener('error', () => {
|
||||||
|
callback('error')
|
||||||
|
}, true)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async loadAudio() {
|
||||||
|
try {
|
||||||
|
const audioResponse: any = await textToAudioStream(this.url, this.isPublic, { content_type: 'audio/mpeg' }, {
|
||||||
|
message_id: this.msgId,
|
||||||
|
streaming: true,
|
||||||
|
voice: this.voice,
|
||||||
|
text: this.msgContent,
|
||||||
|
})
|
||||||
|
|
||||||
|
if (audioResponse.status !== 200) {
|
||||||
|
this.isLoadData = false
|
||||||
|
if (this.callback)
|
||||||
|
this.callback('error')
|
||||||
|
}
|
||||||
|
|
||||||
|
const reader = audioResponse.body.getReader()
|
||||||
|
while (true) {
|
||||||
|
const { value, done } = await reader.read()
|
||||||
|
|
||||||
|
if (done) {
|
||||||
|
this.receiveAudioData(value)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
this.receiveAudioData(value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (error) {
|
||||||
|
this.isLoadData = false
|
||||||
|
this.callback && this.callback('error')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// play audio
|
||||||
|
public playAudio() {
|
||||||
|
if (this.isLoadData) {
|
||||||
|
if (this.audioContext.state === 'suspended') {
|
||||||
|
this.audioContext.resume().then((_) => {
|
||||||
|
this.audio.play()
|
||||||
|
this.callback && this.callback('play')
|
||||||
|
})
|
||||||
|
}
|
||||||
|
else if (this.audio.ended) {
|
||||||
|
this.audio.play()
|
||||||
|
this.callback && this.callback('play')
|
||||||
|
}
|
||||||
|
if (this.callback)
|
||||||
|
this.callback('play')
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
this.isLoadData = true
|
||||||
|
this.loadAudio()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private theEndOfStream() {
|
||||||
|
const endTimer = setInterval(() => {
|
||||||
|
if (!this.sourceBuffer?.updating) {
|
||||||
|
this.mediaSource?.endOfStream()
|
||||||
|
clearInterval(endTimer)
|
||||||
|
}
|
||||||
|
console.log('finishStream endOfStream endTimer')
|
||||||
|
}, 10)
|
||||||
|
}
|
||||||
|
|
||||||
|
private finishStream() {
|
||||||
|
const timer = setInterval(() => {
|
||||||
|
if (!this.cacheBuffers.length) {
|
||||||
|
this.theEndOfStream()
|
||||||
|
clearInterval(timer)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.cacheBuffers.length && !this.sourceBuffer?.updating) {
|
||||||
|
const arrayBuffer = this.cacheBuffers.shift()!
|
||||||
|
this.sourceBuffer?.appendBuffer(arrayBuffer)
|
||||||
|
}
|
||||||
|
console.log('finishStream timer')
|
||||||
|
}, 10)
|
||||||
|
}
|
||||||
|
|
||||||
|
public async playAudioWithAudio(audio: string, play = true) {
|
||||||
|
if (!audio || !audio.length) {
|
||||||
|
this.finishStream()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
const audioContent = Buffer.from(audio, 'base64')
|
||||||
|
this.receiveAudioData(new Uint8Array(audioContent))
|
||||||
|
if (play) {
|
||||||
|
this.isLoadData = true
|
||||||
|
if (this.audio.paused) {
|
||||||
|
this.audioContext.resume().then((_) => {
|
||||||
|
this.audio.play()
|
||||||
|
this.callback && this.callback('play')
|
||||||
|
})
|
||||||
|
}
|
||||||
|
else if (this.audio.ended) {
|
||||||
|
this.audio.play()
|
||||||
|
this.callback && this.callback('play')
|
||||||
|
}
|
||||||
|
else if (this.audio.played) { /* empty */ }
|
||||||
|
|
||||||
|
else {
|
||||||
|
this.audio.play()
|
||||||
|
this.callback && this.callback('play')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public pauseAudio() {
|
||||||
|
this.callback && this.callback('paused')
|
||||||
|
this.audio.pause()
|
||||||
|
this.audioContext.suspend()
|
||||||
|
}
|
||||||
|
|
||||||
|
private cancer() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private receiveAudioData(unit8Array: Uint8Array) {
|
||||||
|
if (!unit8Array) {
|
||||||
|
this.finishStream()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
const audioData = this.byteArrayToArrayBuffer(unit8Array)
|
||||||
|
if (!audioData.byteLength) {
|
||||||
|
if (this.mediaSource?.readyState === 'open')
|
||||||
|
this.finishStream()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.sourceBuffer?.updating) {
|
||||||
|
this.cacheBuffers.push(audioData)
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (this.cacheBuffers.length && !this.sourceBuffer?.updating) {
|
||||||
|
this.cacheBuffers.push(audioData)
|
||||||
|
const cacheBuffer = this.cacheBuffers.shift()!
|
||||||
|
this.sourceBuffer?.appendBuffer(cacheBuffer)
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
this.sourceBuffer?.appendBuffer(audioData)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private byteArrayToArrayBuffer(byteArray: Uint8Array): ArrayBuffer {
|
||||||
|
const arrayBuffer = new ArrayBuffer(byteArray.length)
|
||||||
|
const uint8Array = new Uint8Array(arrayBuffer)
|
||||||
|
uint8Array.set(byteArray)
|
||||||
|
return arrayBuffer
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,124 +1,78 @@
|
||||||
'use client'
|
'use client'
|
||||||
import { useEffect, useRef, useState } from 'react'
|
import { useRef, useState } from 'react'
|
||||||
import { t } from 'i18next'
|
import { t } from 'i18next'
|
||||||
import { useParams, usePathname } from 'next/navigation'
|
import { useParams, usePathname } from 'next/navigation'
|
||||||
import s from './style.module.css'
|
import s from './style.module.css'
|
||||||
import Tooltip from '@/app/components/base/tooltip'
|
import Tooltip from '@/app/components/base/tooltip'
|
||||||
import { randomString } from '@/utils'
|
import { randomString } from '@/utils'
|
||||||
import { textToAudio } from '@/service/share'
|
|
||||||
import Loading from '@/app/components/base/loading'
|
import Loading from '@/app/components/base/loading'
|
||||||
|
import { AudioPlayerManager } from '@/app/components/base/audio-btn/audio.player.manager'
|
||||||
|
|
||||||
type AudioBtnProps = {
|
type AudioBtnProps = {
|
||||||
value: string
|
id?: string
|
||||||
voice?: string
|
voice?: string
|
||||||
|
value?: string
|
||||||
className?: string
|
className?: string
|
||||||
isAudition?: boolean
|
isAudition?: boolean
|
||||||
noCache: boolean
|
noCache?: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
type AudioState = 'initial' | 'loading' | 'playing' | 'paused' | 'ended'
|
type AudioState = 'initial' | 'loading' | 'playing' | 'paused' | 'ended'
|
||||||
|
|
||||||
const AudioBtn = ({
|
const AudioBtn = ({
|
||||||
value,
|
id,
|
||||||
voice,
|
voice,
|
||||||
|
value,
|
||||||
className,
|
className,
|
||||||
isAudition,
|
isAudition,
|
||||||
noCache,
|
|
||||||
}: AudioBtnProps) => {
|
}: AudioBtnProps) => {
|
||||||
const audioRef = useRef<HTMLAudioElement | null>(null)
|
|
||||||
const [audioState, setAudioState] = useState<AudioState>('initial')
|
const [audioState, setAudioState] = useState<AudioState>('initial')
|
||||||
|
|
||||||
const selector = useRef(`play-tooltip-${randomString(4)}`)
|
const selector = useRef(`play-tooltip-${randomString(4)}`)
|
||||||
const params = useParams()
|
const params = useParams()
|
||||||
const pathname = usePathname()
|
const pathname = usePathname()
|
||||||
const removeCodeBlocks = (inputText: any) => {
|
const audio_finished_call = (event: string): any => {
|
||||||
const codeBlockRegex = /```[\s\S]*?```/g
|
switch (event) {
|
||||||
if (inputText)
|
case 'ended':
|
||||||
return inputText.replace(codeBlockRegex, '')
|
setAudioState('ended')
|
||||||
return ''
|
break
|
||||||
}
|
case 'paused':
|
||||||
|
setAudioState('ended')
|
||||||
const loadAudio = async () => {
|
break
|
||||||
const formData = new FormData()
|
case 'loaded':
|
||||||
formData.append('text', removeCodeBlocks(value))
|
setAudioState('loading')
|
||||||
formData.append('voice', removeCodeBlocks(voice))
|
break
|
||||||
|
case 'play':
|
||||||
if (value !== '') {
|
|
||||||
setAudioState('loading')
|
|
||||||
|
|
||||||
let url = ''
|
|
||||||
let isPublic = false
|
|
||||||
|
|
||||||
if (params.token) {
|
|
||||||
url = '/text-to-audio'
|
|
||||||
isPublic = true
|
|
||||||
}
|
|
||||||
else if (params.appId) {
|
|
||||||
if (pathname.search('explore/installed') > -1)
|
|
||||||
url = `/installed-apps/${params.appId}/text-to-audio`
|
|
||||||
else
|
|
||||||
url = `/apps/${params.appId}/text-to-audio`
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
const audioResponse = await textToAudio(url, isPublic, formData)
|
|
||||||
const blob_bytes = Buffer.from(audioResponse.data, 'latin1')
|
|
||||||
const blob = new Blob([blob_bytes], { type: 'audio/wav' })
|
|
||||||
const audioUrl = URL.createObjectURL(blob)
|
|
||||||
audioRef.current!.src = audioUrl
|
|
||||||
}
|
|
||||||
catch (error) {
|
|
||||||
setAudioState('initial')
|
|
||||||
console.error('Error playing audio:', error)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const handleToggle = async () => {
|
|
||||||
if (audioState === 'initial' || noCache) {
|
|
||||||
await loadAudio()
|
|
||||||
}
|
|
||||||
else if (audioRef.current) {
|
|
||||||
if (audioState === 'playing') {
|
|
||||||
audioRef.current.pause()
|
|
||||||
setAudioState('paused')
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
audioRef.current.play()
|
|
||||||
setAudioState('playing')
|
setAudioState('playing')
|
||||||
}
|
break
|
||||||
|
case 'error':
|
||||||
|
setAudioState('ended')
|
||||||
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
let url = ''
|
||||||
|
let isPublic = false
|
||||||
|
|
||||||
useEffect(() => {
|
if (params.token) {
|
||||||
const currentAudio = audioRef.current
|
url = '/text-to-audio'
|
||||||
|
isPublic = true
|
||||||
const handleLoading = () => {
|
}
|
||||||
|
else if (params.appId) {
|
||||||
|
if (pathname.search('explore/installed') > -1)
|
||||||
|
url = `/installed-apps/${params.appId}/text-to-audio`
|
||||||
|
else
|
||||||
|
url = `/apps/${params.appId}/text-to-audio`
|
||||||
|
}
|
||||||
|
const handleToggle = async () => {
|
||||||
|
if (audioState === 'playing' || audioState === 'loading') {
|
||||||
|
setAudioState('paused')
|
||||||
|
AudioPlayerManager.getInstance().getAudioPlayer(url, isPublic, id, value, voice, audio_finished_call).pauseAudio()
|
||||||
|
}
|
||||||
|
else {
|
||||||
setAudioState('loading')
|
setAudioState('loading')
|
||||||
|
AudioPlayerManager.getInstance().getAudioPlayer(url, isPublic, id, value, voice, audio_finished_call).playAudio()
|
||||||
}
|
}
|
||||||
|
}
|
||||||
const handlePlay = () => {
|
|
||||||
currentAudio?.play()
|
|
||||||
setAudioState('playing')
|
|
||||||
}
|
|
||||||
|
|
||||||
const handleEnded = () => {
|
|
||||||
setAudioState('ended')
|
|
||||||
}
|
|
||||||
|
|
||||||
currentAudio?.addEventListener('progress', handleLoading)
|
|
||||||
currentAudio?.addEventListener('canplaythrough', handlePlay)
|
|
||||||
currentAudio?.addEventListener('ended', handleEnded)
|
|
||||||
|
|
||||||
return () => {
|
|
||||||
currentAudio?.removeEventListener('progress', handleLoading)
|
|
||||||
currentAudio?.removeEventListener('canplaythrough', handlePlay)
|
|
||||||
currentAudio?.removeEventListener('ended', handleEnded)
|
|
||||||
URL.revokeObjectURL(currentAudio?.src || '')
|
|
||||||
currentAudio?.pause()
|
|
||||||
currentAudio?.setAttribute('src', '')
|
|
||||||
}
|
|
||||||
}, [])
|
|
||||||
|
|
||||||
const tooltipContent = {
|
const tooltipContent = {
|
||||||
initial: t('appApi.play'),
|
initial: t('appApi.play'),
|
||||||
|
@ -151,7 +105,6 @@ const AudioBtn = ({
|
||||||
)}
|
)}
|
||||||
</button>
|
</button>
|
||||||
</Tooltip>
|
</Tooltip>
|
||||||
<audio ref={audioRef} src='' className='hidden' />
|
|
||||||
</div>
|
</div>
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,6 +8,7 @@ import type {
|
||||||
ChatConfig,
|
ChatConfig,
|
||||||
ChatItem,
|
ChatItem,
|
||||||
} from '../../types'
|
} from '../../types'
|
||||||
|
import { useChatContext } from '../context'
|
||||||
import Operation from './operation'
|
import Operation from './operation'
|
||||||
import AgentContent from './agent-content'
|
import AgentContent from './agent-content'
|
||||||
import BasicContent from './basic-content'
|
import BasicContent from './basic-content'
|
||||||
|
@ -59,23 +60,25 @@ const Answer: FC<AnswerProps> = ({
|
||||||
} = item
|
} = item
|
||||||
const hasAgentThoughts = !!agent_thoughts?.length
|
const hasAgentThoughts = !!agent_thoughts?.length
|
||||||
|
|
||||||
const [containerWidth, setContainerWidth] = useState(0)
|
const [containerWidth] = useState(0)
|
||||||
const [contentWidth, setContentWidth] = useState(0)
|
const [contentWidth, setContentWidth] = useState(0)
|
||||||
const containerRef = useRef<HTMLDivElement>(null)
|
const containerRef = useRef<HTMLDivElement>(null)
|
||||||
const contentRef = useRef<HTMLDivElement>(null)
|
const contentRef = useRef<HTMLDivElement>(null)
|
||||||
|
|
||||||
const getContainerWidth = () => {
|
const {
|
||||||
if (containerRef.current)
|
config: chatContextConfig,
|
||||||
setContainerWidth(containerRef.current?.clientWidth + 16)
|
} = useChatContext()
|
||||||
}
|
|
||||||
|
const voiceRef = useRef(chatContextConfig?.text_to_speech?.voice)
|
||||||
const getContentWidth = () => {
|
const getContentWidth = () => {
|
||||||
if (contentRef.current)
|
if (contentRef.current)
|
||||||
setContentWidth(contentRef.current?.clientWidth)
|
setContentWidth(contentRef.current?.clientWidth)
|
||||||
}
|
}
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
getContainerWidth()
|
voiceRef.current = chatContextConfig?.text_to_speech?.voice
|
||||||
}, [])
|
}
|
||||||
|
, [chatContextConfig?.text_to_speech?.voice])
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (!responding)
|
if (!responding)
|
||||||
|
|
|
@ -119,9 +119,9 @@ const Operation: FC<OperationProps> = ({
|
||||||
<>
|
<>
|
||||||
<div className='mx-1 w-[1px] h-[14px] bg-gray-200'/>
|
<div className='mx-1 w-[1px] h-[14px] bg-gray-200'/>
|
||||||
<AudioBtn
|
<AudioBtn
|
||||||
|
id={id}
|
||||||
value={content}
|
value={content}
|
||||||
noCache={false}
|
noCache={false}
|
||||||
voice={config?.text_to_speech?.voice}
|
|
||||||
className='hidden group-hover:block'
|
className='hidden group-hover:block'
|
||||||
/>
|
/>
|
||||||
</>
|
</>
|
||||||
|
|
|
@ -6,6 +6,8 @@ import {
|
||||||
} from 'react'
|
} from 'react'
|
||||||
import { useTranslation } from 'react-i18next'
|
import { useTranslation } from 'react-i18next'
|
||||||
import { produce, setAutoFreeze } from 'immer'
|
import { produce, setAutoFreeze } from 'immer'
|
||||||
|
import { useParams, usePathname } from 'next/navigation'
|
||||||
|
import { v4 as uuidV4 } from 'uuid'
|
||||||
import type {
|
import type {
|
||||||
ChatConfig,
|
ChatConfig,
|
||||||
ChatItem,
|
ChatItem,
|
||||||
|
@ -20,6 +22,7 @@ import { replaceStringWithValues } from '@/app/components/app/configuration/prom
|
||||||
import type { Annotation } from '@/models/log'
|
import type { Annotation } from '@/models/log'
|
||||||
import { WorkflowRunningStatus } from '@/app/components/workflow/types'
|
import { WorkflowRunningStatus } from '@/app/components/workflow/types'
|
||||||
import useTimestamp from '@/hooks/use-timestamp'
|
import useTimestamp from '@/hooks/use-timestamp'
|
||||||
|
import { AudioPlayerManager } from '@/app/components/base/audio-btn/audio.player.manager'
|
||||||
|
|
||||||
type GetAbortController = (abortController: AbortController) => void
|
type GetAbortController = (abortController: AbortController) => void
|
||||||
type SendCallback = {
|
type SendCallback = {
|
||||||
|
@ -91,7 +94,8 @@ export const useChat = (
|
||||||
const conversationMessagesAbortControllerRef = useRef<AbortController | null>(null)
|
const conversationMessagesAbortControllerRef = useRef<AbortController | null>(null)
|
||||||
const suggestedQuestionsAbortControllerRef = useRef<AbortController | null>(null)
|
const suggestedQuestionsAbortControllerRef = useRef<AbortController | null>(null)
|
||||||
const checkPromptVariables = useCheckPromptVariables()
|
const checkPromptVariables = useCheckPromptVariables()
|
||||||
|
const params = useParams()
|
||||||
|
const pathname = usePathname()
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
setAutoFreeze(false)
|
setAutoFreeze(false)
|
||||||
return () => {
|
return () => {
|
||||||
|
@ -262,6 +266,19 @@ export const useChat = (
|
||||||
let isAgentMode = false
|
let isAgentMode = false
|
||||||
let hasSetResponseId = false
|
let hasSetResponseId = false
|
||||||
|
|
||||||
|
let ttsUrl = ''
|
||||||
|
let ttsIsPublic = false
|
||||||
|
if (params.token) {
|
||||||
|
ttsUrl = '/text-to-audio'
|
||||||
|
ttsIsPublic = true
|
||||||
|
}
|
||||||
|
else if (params.appId) {
|
||||||
|
if (pathname.search('explore/installed') > -1)
|
||||||
|
ttsUrl = `/installed-apps/${params.appId}/text-to-audio`
|
||||||
|
else
|
||||||
|
ttsUrl = `/apps/${params.appId}/text-to-audio`
|
||||||
|
}
|
||||||
|
const player = AudioPlayerManager.getInstance().getAudioPlayer(ttsUrl, ttsIsPublic, uuidV4(), 'none', 'none', (_: any): any => {})
|
||||||
ssePost(
|
ssePost(
|
||||||
url,
|
url,
|
||||||
{
|
{
|
||||||
|
@ -530,6 +547,15 @@ export const useChat = (
|
||||||
}
|
}
|
||||||
}))
|
}))
|
||||||
},
|
},
|
||||||
|
onTTSChunk: (messageId: string, audio: string) => {
|
||||||
|
if (!audio || audio === '')
|
||||||
|
return
|
||||||
|
player.playAudioWithAudio(audio, true)
|
||||||
|
AudioPlayerManager.getInstance().resetMsgId(messageId)
|
||||||
|
},
|
||||||
|
onTTSEnd: (messageId: string, audio: string) => {
|
||||||
|
player.playAudioWithAudio(audio, false)
|
||||||
|
},
|
||||||
})
|
})
|
||||||
return true
|
return true
|
||||||
}, [
|
}, [
|
||||||
|
|
|
@ -19,6 +19,8 @@ import type { Item } from '@/app/components/base/select'
|
||||||
import { fetchAppVoices } from '@/service/apps'
|
import { fetchAppVoices } from '@/service/apps'
|
||||||
import Tooltip from '@/app/components/base/tooltip'
|
import Tooltip from '@/app/components/base/tooltip'
|
||||||
import { languages } from '@/i18n/language'
|
import { languages } from '@/i18n/language'
|
||||||
|
import RadioGroup from '@/app/components/app/configuration/config-vision/radio-group'
|
||||||
|
import { TtsAutoPlay } from '@/types/app'
|
||||||
|
|
||||||
type VoiceParamConfigProps = {
|
type VoiceParamConfigProps = {
|
||||||
onChange?: OnFeaturesChange
|
onChange?: OnFeaturesChange
|
||||||
|
@ -33,12 +35,16 @@ const VoiceParamConfig = ({
|
||||||
const text2speech = useFeatures(state => state.features.text2speech)
|
const text2speech = useFeatures(state => state.features.text2speech)
|
||||||
const featuresStore = useFeaturesStore()
|
const featuresStore = useFeaturesStore()
|
||||||
|
|
||||||
const languageItem = languages.find(item => item.value === text2speech.language)
|
let languageItem = languages.find(item => item.value === text2speech?.language)
|
||||||
|
if (languages && !languageItem)
|
||||||
|
languageItem = languages[0]
|
||||||
const localLanguagePlaceholder = languageItem?.name || t('common.placeholder.select')
|
const localLanguagePlaceholder = languageItem?.name || t('common.placeholder.select')
|
||||||
|
|
||||||
const language = languageItem?.value
|
const language = languageItem?.value
|
||||||
const voiceItems = useSWR({ appId, language }, fetchAppVoices).data
|
const voiceItems = useSWR({ appId, language }, fetchAppVoices).data
|
||||||
const voiceItem = voiceItems?.find(item => item.value === text2speech.voice)
|
let voiceItem = voiceItems?.find(item => item.value === text2speech?.voice)
|
||||||
|
if (voiceItems && !voiceItem)
|
||||||
|
voiceItem = voiceItems[0]
|
||||||
const localVoicePlaceholder = voiceItem?.name || t('common.placeholder.select')
|
const localVoicePlaceholder = voiceItem?.name || t('common.placeholder.select')
|
||||||
|
|
||||||
const handleChange = (value: Record<string, string>) => {
|
const handleChange = (value: Record<string, string>) => {
|
||||||
|
@ -66,13 +72,14 @@ const VoiceParamConfig = ({
|
||||||
<div className='pt-3 space-y-6'>
|
<div className='pt-3 space-y-6'>
|
||||||
<div>
|
<div>
|
||||||
<div className='mb-2 flex items-center space-x-1'>
|
<div className='mb-2 flex items-center space-x-1'>
|
||||||
<div className='leading-[18px] text-[13px] font-semibold text-gray-800'>{t('appDebug.voice.voiceSettings.language')}</div>
|
<div
|
||||||
<Tooltip htmlContent={<div className='w-[180px]' >
|
className='leading-[18px] text-[13px] font-semibold text-gray-800'>{t('appDebug.voice.voiceSettings.language')}</div>
|
||||||
|
<Tooltip htmlContent={<div className='w-[180px]'>
|
||||||
{t('appDebug.voice.voiceSettings.resolutionTooltip').split('\n').map(item => (
|
{t('appDebug.voice.voiceSettings.resolutionTooltip').split('\n').map(item => (
|
||||||
<div key={item}>{item}</div>
|
<div key={item}>{item}</div>
|
||||||
))}
|
))}
|
||||||
</div>} selector='config-resolution-tooltip'>
|
</div>} selector='config-resolution-tooltip'>
|
||||||
<RiQuestionLine className='w-[14px] h-[14px] text-gray-400' />
|
<RiQuestionLine className='w-[14px] h-[14px] text-gray-400'/>
|
||||||
</Tooltip>
|
</Tooltip>
|
||||||
</div>
|
</div>
|
||||||
<Listbox
|
<Listbox
|
||||||
|
@ -84,7 +91,8 @@ const VoiceParamConfig = ({
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
<div className={'relative h-9'}>
|
<div className={'relative h-9'}>
|
||||||
<Listbox.Button className={'w-full h-full rounded-lg border-0 bg-gray-100 py-1.5 pl-3 pr-10 sm:text-sm sm:leading-6 focus-visible:outline-none focus-visible:bg-gray-200 group-hover:bg-gray-200 cursor-pointer'}>
|
<Listbox.Button
|
||||||
|
className={'w-full h-full rounded-lg border-0 bg-gray-100 py-1.5 pl-3 pr-10 sm:text-sm sm:leading-6 focus-visible:outline-none focus-visible:bg-gray-200 group-hover:bg-gray-200 cursor-pointer'}>
|
||||||
<span className={classNames('block truncate text-left', !languageItem?.name && 'text-gray-400')}>
|
<span className={classNames('block truncate text-left', !languageItem?.name && 'text-gray-400')}>
|
||||||
{languageItem?.name ? t(`common.voice.language.${languageItem?.value.replace('-', '')}`) : localLanguagePlaceholder}
|
{languageItem?.name ? t(`common.voice.language.${languageItem?.value.replace('-', '')}`) : localLanguagePlaceholder}
|
||||||
</span>
|
</span>
|
||||||
|
@ -102,7 +110,8 @@ const VoiceParamConfig = ({
|
||||||
leaveTo="opacity-0"
|
leaveTo="opacity-0"
|
||||||
>
|
>
|
||||||
|
|
||||||
<Listbox.Options className="absolute z-10 mt-1 px-1 max-h-60 w-full overflow-auto rounded-md bg-white py-1 text-base shadow-lg border-gray-200 border-[0.5px] focus:outline-none sm:text-sm">
|
<Listbox.Options
|
||||||
|
className="absolute z-10 mt-1 px-1 max-h-60 w-full overflow-auto rounded-md bg-white py-1 text-base shadow-lg border-gray-200 border-[0.5px] focus:outline-none sm:text-sm">
|
||||||
{languages.map((item: Item) => (
|
{languages.map((item: Item) => (
|
||||||
<Listbox.Option
|
<Listbox.Option
|
||||||
key={item.value}
|
key={item.value}
|
||||||
|
@ -117,13 +126,13 @@ const VoiceParamConfig = ({
|
||||||
<>
|
<>
|
||||||
<span
|
<span
|
||||||
className={classNames('block', selected && 'font-normal')}>{t(`common.voice.language.${(item.value).toString().replace('-', '')}`)}</span>
|
className={classNames('block', selected && 'font-normal')}>{t(`common.voice.language.${(item.value).toString().replace('-', '')}`)}</span>
|
||||||
{(selected || item.value === text2speech.language) && (
|
{(selected || item.value === text2speech?.language) && (
|
||||||
<span
|
<span
|
||||||
className={classNames(
|
className={classNames(
|
||||||
'absolute inset-y-0 right-0 flex items-center pr-4 text-gray-700',
|
'absolute inset-y-0 right-0 flex items-center pr-4 text-gray-700',
|
||||||
)}
|
)}
|
||||||
>
|
>
|
||||||
<CheckIcon className="h-5 w-5" aria-hidden="true" />
|
<CheckIcon className="h-5 w-5" aria-hidden="true"/>
|
||||||
</span>
|
</span>
|
||||||
)}
|
)}
|
||||||
</>
|
</>
|
||||||
|
@ -137,7 +146,8 @@ const VoiceParamConfig = ({
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div>
|
<div>
|
||||||
<div className='mb-2 leading-[18px] text-[13px] font-semibold text-gray-800'>{t('appDebug.voice.voiceSettings.voice')}</div>
|
<div
|
||||||
|
className='mb-2 leading-[18px] text-[13px] font-semibold text-gray-800'>{t('appDebug.voice.voiceSettings.voice')}</div>
|
||||||
<Listbox
|
<Listbox
|
||||||
value={voiceItem}
|
value={voiceItem}
|
||||||
disabled={!languageItem}
|
disabled={!languageItem}
|
||||||
|
@ -148,8 +158,10 @@ const VoiceParamConfig = ({
|
||||||
}}
|
}}
|
||||||
>
|
>
|
||||||
<div className={'relative h-9'}>
|
<div className={'relative h-9'}>
|
||||||
<Listbox.Button className={'w-full h-full rounded-lg border-0 bg-gray-100 py-1.5 pl-3 pr-10 sm:text-sm sm:leading-6 focus-visible:outline-none focus-visible:bg-gray-200 group-hover:bg-gray-200 cursor-pointer'}>
|
<Listbox.Button
|
||||||
<span className={classNames('block truncate text-left', !voiceItem?.name && 'text-gray-400')}>{voiceItem?.name ?? localVoicePlaceholder}</span>
|
className={'w-full h-full rounded-lg border-0 bg-gray-100 py-1.5 pl-3 pr-10 sm:text-sm sm:leading-6 focus-visible:outline-none focus-visible:bg-gray-200 group-hover:bg-gray-200 cursor-pointer'}>
|
||||||
|
<span
|
||||||
|
className={classNames('block truncate text-left', !voiceItem?.name && 'text-gray-400')}>{voiceItem?.name ?? localVoicePlaceholder}</span>
|
||||||
<span className="pointer-events-none absolute inset-y-0 right-0 flex items-center pr-2">
|
<span className="pointer-events-none absolute inset-y-0 right-0 flex items-center pr-2">
|
||||||
<ChevronDownIcon
|
<ChevronDownIcon
|
||||||
className="h-5 w-5 text-gray-400"
|
className="h-5 w-5 text-gray-400"
|
||||||
|
@ -164,7 +176,8 @@ const VoiceParamConfig = ({
|
||||||
leaveTo="opacity-0"
|
leaveTo="opacity-0"
|
||||||
>
|
>
|
||||||
|
|
||||||
<Listbox.Options className="absolute z-10 mt-1 px-1 max-h-60 w-full overflow-auto rounded-md bg-white py-1 text-base shadow-lg border-gray-200 border-[0.5px] focus:outline-none sm:text-sm">
|
<Listbox.Options
|
||||||
|
className="absolute z-10 mt-1 px-1 max-h-60 w-full overflow-auto rounded-md bg-white py-1 text-base shadow-lg border-gray-200 border-[0.5px] focus:outline-none sm:text-sm">
|
||||||
{voiceItems?.map((item: Item) => (
|
{voiceItems?.map((item: Item) => (
|
||||||
<Listbox.Option
|
<Listbox.Option
|
||||||
key={item.value}
|
key={item.value}
|
||||||
|
@ -178,13 +191,13 @@ const VoiceParamConfig = ({
|
||||||
{({ /* active, */ selected }) => (
|
{({ /* active, */ selected }) => (
|
||||||
<>
|
<>
|
||||||
<span className={classNames('block', selected && 'font-normal')}>{item.name}</span>
|
<span className={classNames('block', selected && 'font-normal')}>{item.name}</span>
|
||||||
{(selected || item.value === text2speech.voice) && (
|
{(selected || item.value === text2speech?.voice) && (
|
||||||
<span
|
<span
|
||||||
className={classNames(
|
className={classNames(
|
||||||
'absolute inset-y-0 right-0 flex items-center pr-4 text-gray-700',
|
'absolute inset-y-0 right-0 flex items-center pr-4 text-gray-700',
|
||||||
)}
|
)}
|
||||||
>
|
>
|
||||||
<CheckIcon className="h-5 w-5" aria-hidden="true" />
|
<CheckIcon className="h-5 w-5" aria-hidden="true"/>
|
||||||
</span>
|
</span>
|
||||||
)}
|
)}
|
||||||
</>
|
</>
|
||||||
|
@ -196,6 +209,29 @@ const VoiceParamConfig = ({
|
||||||
</div>
|
</div>
|
||||||
</Listbox>
|
</Listbox>
|
||||||
</div>
|
</div>
|
||||||
|
<div>
|
||||||
|
<div
|
||||||
|
className='mb-2 leading-[18px] text-[13px] font-semibold text-gray-800'>{t('appDebug.voice.voiceSettings.autoPlay')}</div>
|
||||||
|
<RadioGroup
|
||||||
|
className='space-x-3'
|
||||||
|
options={[
|
||||||
|
{
|
||||||
|
label: t('appDebug.voice.voiceSettings.autoPlayEnabled'),
|
||||||
|
value: TtsAutoPlay.enabled,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: t('appDebug.voice.voiceSettings.autoPlayDisabled'),
|
||||||
|
value: TtsAutoPlay.disabled,
|
||||||
|
},
|
||||||
|
]}
|
||||||
|
value={text2speech?.autoPlay ? text2speech?.autoPlay : TtsAutoPlay.disabled}
|
||||||
|
onChange={(value: TtsAutoPlay) => {
|
||||||
|
handleChange({
|
||||||
|
autoPlay: value,
|
||||||
|
})
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
import type { TransferMethod } from '@/types/app'
|
import type { TransferMethod, TtsAutoPlay } from '@/types/app'
|
||||||
|
|
||||||
export type EnabledOrDisabled = {
|
export type EnabledOrDisabled = {
|
||||||
enabled?: boolean
|
enabled?: boolean
|
||||||
|
@ -14,6 +14,7 @@ export type SuggestedQuestionsAfterAnswer = EnabledOrDisabled
|
||||||
export type TextToSpeech = EnabledOrDisabled & {
|
export type TextToSpeech = EnabledOrDisabled & {
|
||||||
language?: string
|
language?: string
|
||||||
voice?: string
|
voice?: string
|
||||||
|
autoPlay?: TtsAutoPlay
|
||||||
}
|
}
|
||||||
|
|
||||||
export type SpeechToText = EnabledOrDisabled
|
export type SpeechToText = EnabledOrDisabled
|
||||||
|
|
|
@ -4,6 +4,8 @@ import {
|
||||||
useStoreApi,
|
useStoreApi,
|
||||||
} from 'reactflow'
|
} from 'reactflow'
|
||||||
import produce from 'immer'
|
import produce from 'immer'
|
||||||
|
import { v4 as uuidV4 } from 'uuid'
|
||||||
|
import { usePathname } from 'next/navigation'
|
||||||
import { useWorkflowStore } from '../store'
|
import { useWorkflowStore } from '../store'
|
||||||
import { useNodesSyncDraft } from '../hooks'
|
import { useNodesSyncDraft } from '../hooks'
|
||||||
import {
|
import {
|
||||||
|
@ -19,6 +21,7 @@ import {
|
||||||
stopWorkflowRun,
|
stopWorkflowRun,
|
||||||
} from '@/service/workflow'
|
} from '@/service/workflow'
|
||||||
import { useFeaturesStore } from '@/app/components/base/features/hooks'
|
import { useFeaturesStore } from '@/app/components/base/features/hooks'
|
||||||
|
import { AudioPlayerManager } from '@/app/components/base/audio-btn/audio.player.manager'
|
||||||
|
|
||||||
export const useWorkflowRun = () => {
|
export const useWorkflowRun = () => {
|
||||||
const store = useStoreApi()
|
const store = useStoreApi()
|
||||||
|
@ -27,6 +30,7 @@ export const useWorkflowRun = () => {
|
||||||
const featuresStore = useFeaturesStore()
|
const featuresStore = useFeaturesStore()
|
||||||
const { doSyncWorkflowDraft } = useNodesSyncDraft()
|
const { doSyncWorkflowDraft } = useNodesSyncDraft()
|
||||||
const { handleUpdateWorkflowCanvas } = useWorkflowUpdate()
|
const { handleUpdateWorkflowCanvas } = useWorkflowUpdate()
|
||||||
|
const pathname = usePathname()
|
||||||
|
|
||||||
const handleBackupDraft = useCallback(() => {
|
const handleBackupDraft = useCallback(() => {
|
||||||
const {
|
const {
|
||||||
|
@ -134,6 +138,20 @@ export const useWorkflowRun = () => {
|
||||||
let isInIteration = false
|
let isInIteration = false
|
||||||
let iterationLength = 0
|
let iterationLength = 0
|
||||||
|
|
||||||
|
let ttsUrl = ''
|
||||||
|
let ttsIsPublic = false
|
||||||
|
if (params.token) {
|
||||||
|
ttsUrl = '/text-to-audio'
|
||||||
|
ttsIsPublic = true
|
||||||
|
}
|
||||||
|
else if (params.appId) {
|
||||||
|
if (pathname.search('explore/installed') > -1)
|
||||||
|
ttsUrl = `/installed-apps/${params.appId}/text-to-audio`
|
||||||
|
else
|
||||||
|
ttsUrl = `/apps/${params.appId}/text-to-audio`
|
||||||
|
}
|
||||||
|
const player = AudioPlayerManager.getInstance().getAudioPlayer(ttsUrl, ttsIsPublic, uuidV4(), 'none', 'none', (_: any): any => {})
|
||||||
|
|
||||||
ssePost(
|
ssePost(
|
||||||
url,
|
url,
|
||||||
{
|
{
|
||||||
|
@ -468,6 +486,15 @@ export const useWorkflowRun = () => {
|
||||||
draft.resultText = text
|
draft.resultText = text
|
||||||
}))
|
}))
|
||||||
},
|
},
|
||||||
|
onTTSChunk: (messageId: string, audio: string, audioType?: string) => {
|
||||||
|
if (!audio || audio === '')
|
||||||
|
return
|
||||||
|
player.playAudioWithAudio(audio, true)
|
||||||
|
AudioPlayerManager.getInstance().resetMsgId(messageId)
|
||||||
|
},
|
||||||
|
onTTSEnd: (messageId: string, audio: string, audioType?: string) => {
|
||||||
|
player.playAudioWithAudio(audio, false)
|
||||||
|
},
|
||||||
...restCallback,
|
...restCallback,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
|
@ -323,6 +323,9 @@ const translation = {
|
||||||
language: 'Language',
|
language: 'Language',
|
||||||
resolutionTooltip: 'Text-to-speech voice support language。',
|
resolutionTooltip: 'Text-to-speech voice support language。',
|
||||||
voice: 'Voice',
|
voice: 'Voice',
|
||||||
|
autoPlay: 'Auto Play',
|
||||||
|
autoPlayEnabled: 'Turn On',
|
||||||
|
autoPlayDisabled: 'Turn Off',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
openingStatement: {
|
openingStatement: {
|
||||||
|
|
|
@ -319,6 +319,9 @@ const translation = {
|
||||||
language: '言語',
|
language: '言語',
|
||||||
resolutionTooltip: 'テキスト読み上げの音声言語をサポートします。',
|
resolutionTooltip: 'テキスト読み上げの音声言語をサポートします。',
|
||||||
voice: '音声',
|
voice: '音声',
|
||||||
|
autoPlay: '自動再生',
|
||||||
|
autoPlayEnabled: '開ける',
|
||||||
|
autoPlayDisabled: '關閉',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
openingStatement: {
|
openingStatement: {
|
||||||
|
|
|
@ -319,6 +319,9 @@ const translation = {
|
||||||
language: '语言',
|
language: '语言',
|
||||||
resolutionTooltip: '文本转语音音色支持语言。',
|
resolutionTooltip: '文本转语音音色支持语言。',
|
||||||
voice: '音色',
|
voice: '音色',
|
||||||
|
autoPlay: '自动播放',
|
||||||
|
autoPlayEnabled: '开启',
|
||||||
|
autoPlayDisabled: '关闭',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
openingStatement: {
|
openingStatement: {
|
||||||
|
|
|
@ -318,6 +318,9 @@ const translation = {
|
||||||
language: '語言',
|
language: '語言',
|
||||||
resolutionTooltip: '文字轉語音音色支援語言。',
|
resolutionTooltip: '文字轉語音音色支援語言。',
|
||||||
voice: '音色',
|
voice: '音色',
|
||||||
|
autoPlay: '自動播放',
|
||||||
|
autoPlayEnabled: '開啟',
|
||||||
|
autoPlayDisabled: '關閉',
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
openingStatement: {
|
openingStatement: {
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
import type { AgentStrategy, ModelModeType, RETRIEVE_TYPE, ToolItem } from '@/types/app'
|
import type { AgentStrategy, ModelModeType, RETRIEVE_TYPE, ToolItem, TtsAutoPlay } from '@/types/app'
|
||||||
export type Inputs = Record<string, string | number | object>
|
export type Inputs = Record<string, string | number | object>
|
||||||
|
|
||||||
export enum PromptMode {
|
export enum PromptMode {
|
||||||
|
@ -79,6 +79,7 @@ export type TextToSpeechConfig = {
|
||||||
enabled: boolean
|
enabled: boolean
|
||||||
voice?: string
|
voice?: string
|
||||||
language?: string
|
language?: string
|
||||||
|
autoPlay?: TtsAutoPlay
|
||||||
}
|
}
|
||||||
|
|
||||||
export type CitationConfig = MoreLikeThisConfig
|
export type CitationConfig = MoreLikeThisConfig
|
||||||
|
|
|
@ -34,6 +34,7 @@ const nextConfig = {
|
||||||
// https://nextjs.org/docs/api-reference/next.config.js/ignoring-typescript-errors
|
// https://nextjs.org/docs/api-reference/next.config.js/ignoring-typescript-errors
|
||||||
ignoreBuildErrors: true,
|
ignoreBuildErrors: true,
|
||||||
},
|
},
|
||||||
|
reactStrictMode: true,
|
||||||
async redirects() {
|
async redirects() {
|
||||||
return [
|
return [
|
||||||
{
|
{
|
||||||
|
|
|
@ -120,6 +120,7 @@ export const generationIntroduction: Fetcher<GenerationIntroductionResponse, { u
|
||||||
}
|
}
|
||||||
|
|
||||||
export const fetchAppVoices: Fetcher<AppVoicesListResponse, { appId: string; language?: string }> = ({ appId, language }) => {
|
export const fetchAppVoices: Fetcher<AppVoicesListResponse, { appId: string; language?: string }> = ({ appId, language }) => {
|
||||||
|
language = language || 'en-US'
|
||||||
return get<AppVoicesListResponse>(`apps/${appId}/text-to-audio/voices?language=${language}`)
|
return get<AppVoicesListResponse>(`apps/${appId}/text-to-audio/voices?language=${language}`)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,7 @@ const TIME_OUT = 100000
|
||||||
const ContentType = {
|
const ContentType = {
|
||||||
json: 'application/json',
|
json: 'application/json',
|
||||||
stream: 'text/event-stream',
|
stream: 'text/event-stream',
|
||||||
|
audio: 'audio/mpeg',
|
||||||
form: 'application/x-www-form-urlencoded; charset=UTF-8',
|
form: 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||||
download: 'application/octet-stream', // for download
|
download: 'application/octet-stream', // for download
|
||||||
upload: 'multipart/form-data', // for upload
|
upload: 'multipart/form-data', // for upload
|
||||||
|
@ -59,6 +60,8 @@ export type IOnIterationStarted = (workflowStarted: IterationStartedResponse) =>
|
||||||
export type IOnIterationNexted = (workflowStarted: IterationNextedResponse) => void
|
export type IOnIterationNexted = (workflowStarted: IterationNextedResponse) => void
|
||||||
export type IOnIterationFinished = (workflowFinished: IterationFinishedResponse) => void
|
export type IOnIterationFinished = (workflowFinished: IterationFinishedResponse) => void
|
||||||
export type IOnTextChunk = (textChunk: TextChunkResponse) => void
|
export type IOnTextChunk = (textChunk: TextChunkResponse) => void
|
||||||
|
export type IOnTTSChunk = (messageId: string, audioStr: string, audioType?: string) => void
|
||||||
|
export type IOnTTSEnd = (messageId: string, audioStr: string, audioType?: string) => void
|
||||||
export type IOnTextReplace = (textReplace: TextReplaceResponse) => void
|
export type IOnTextReplace = (textReplace: TextReplaceResponse) => void
|
||||||
|
|
||||||
export type IOtherOptions = {
|
export type IOtherOptions = {
|
||||||
|
@ -84,6 +87,8 @@ export type IOtherOptions = {
|
||||||
onIterationNext?: IOnIterationNexted
|
onIterationNext?: IOnIterationNexted
|
||||||
onIterationFinish?: IOnIterationFinished
|
onIterationFinish?: IOnIterationFinished
|
||||||
onTextChunk?: IOnTextChunk
|
onTextChunk?: IOnTextChunk
|
||||||
|
onTTSChunk?: IOnTTSChunk
|
||||||
|
onTTSEnd?: IOnTTSEnd
|
||||||
onTextReplace?: IOnTextReplace
|
onTextReplace?: IOnTextReplace
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -135,6 +140,8 @@ const handleStream = (
|
||||||
onIterationNext?: IOnIterationNexted,
|
onIterationNext?: IOnIterationNexted,
|
||||||
onIterationFinish?: IOnIterationFinished,
|
onIterationFinish?: IOnIterationFinished,
|
||||||
onTextChunk?: IOnTextChunk,
|
onTextChunk?: IOnTextChunk,
|
||||||
|
onTTSChunk?: IOnTTSChunk,
|
||||||
|
onTTSEnd?: IOnTTSEnd,
|
||||||
onTextReplace?: IOnTextReplace,
|
onTextReplace?: IOnTextReplace,
|
||||||
) => {
|
) => {
|
||||||
if (!response.ok)
|
if (!response.ok)
|
||||||
|
@ -227,6 +234,12 @@ const handleStream = (
|
||||||
else if (bufferObj.event === 'text_replace') {
|
else if (bufferObj.event === 'text_replace') {
|
||||||
onTextReplace?.(bufferObj as TextReplaceResponse)
|
onTextReplace?.(bufferObj as TextReplaceResponse)
|
||||||
}
|
}
|
||||||
|
else if (bufferObj.event === 'tts_message') {
|
||||||
|
onTTSChunk?.(bufferObj.message_id, bufferObj.audio, bufferObj.audio_type)
|
||||||
|
}
|
||||||
|
else if (bufferObj.event === 'tts_message_end') {
|
||||||
|
onTTSEnd?.(bufferObj.message_id, bufferObj.audio)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
buffer = lines[lines.length - 1]
|
buffer = lines[lines.length - 1]
|
||||||
|
@ -390,9 +403,10 @@ const baseFetch = <T>(
|
||||||
}
|
}
|
||||||
|
|
||||||
// return data
|
// return data
|
||||||
const data: Promise<T> = options.headers.get('Content-type') === ContentType.download ? res.blob() : res.json()
|
if (options.headers.get('Content-type') === ContentType.download || options.headers.get('Content-type') === ContentType.audio)
|
||||||
|
resolve(needAllResponseContent ? resClone : res.blob())
|
||||||
|
|
||||||
resolve(needAllResponseContent ? resClone : data)
|
else resolve(needAllResponseContent ? resClone : res.json())
|
||||||
})
|
})
|
||||||
.catch((err) => {
|
.catch((err) => {
|
||||||
if (!silent)
|
if (!silent)
|
||||||
|
@ -475,6 +489,8 @@ export const ssePost = (
|
||||||
onIterationNext,
|
onIterationNext,
|
||||||
onIterationFinish,
|
onIterationFinish,
|
||||||
onTextChunk,
|
onTextChunk,
|
||||||
|
onTTSChunk,
|
||||||
|
onTTSEnd,
|
||||||
onTextReplace,
|
onTextReplace,
|
||||||
onError,
|
onError,
|
||||||
getAbortController,
|
getAbortController,
|
||||||
|
@ -527,7 +543,7 @@ export const ssePost = (
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
onData?.(str, isFirstMessage, moreInfo)
|
onData?.(str, isFirstMessage, moreInfo)
|
||||||
}, onCompleted, onThought, onMessageEnd, onMessageReplace, onFile, onWorkflowStarted, onWorkflowFinished, onNodeStarted, onNodeFinished, onIterationStart, onIterationNext, onIterationFinish, onTextChunk, onTextReplace)
|
}, onCompleted, onThought, onMessageEnd, onMessageReplace, onFile, onWorkflowStarted, onWorkflowFinished, onNodeStarted, onNodeFinished, onIterationStart, onIterationNext, onIterationFinish, onTextChunk, onTTSChunk, onTTSEnd, onTextReplace)
|
||||||
}).catch((e) => {
|
}).catch((e) => {
|
||||||
if (e.toString() !== 'AbortError: The user aborted a request.')
|
if (e.toString() !== 'AbortError: The user aborted a request.')
|
||||||
Toast.notify({ type: 'error', message: e })
|
Toast.notify({ type: 'error', message: e })
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
import type { IOnCompleted, IOnData, IOnError, IOnFile, IOnIterationFinished, IOnIterationNexted, IOnIterationStarted, IOnMessageEnd, IOnMessageReplace, IOnNodeFinished, IOnNodeStarted, IOnTextChunk, IOnTextReplace, IOnThought, IOnWorkflowFinished, IOnWorkflowStarted } from './base'
|
import type { IOnCompleted, IOnData, IOnError, IOnFile, IOnIterationFinished, IOnIterationNexted, IOnIterationStarted, IOnMessageEnd, IOnMessageReplace, IOnNodeFinished, IOnNodeStarted, IOnTTSChunk, IOnTTSEnd, IOnTextChunk, IOnTextReplace, IOnThought, IOnWorkflowFinished, IOnWorkflowStarted } from './base'
|
||||||
import {
|
import {
|
||||||
del as consoleDel, get as consoleGet, patch as consolePatch, post as consolePost,
|
del as consoleDel, get as consoleGet, patch as consolePatch, post as consolePost,
|
||||||
delPublic as del, getPublic as get, patchPublic as patch, postPublic as post, ssePost,
|
delPublic as del, getPublic as get, patchPublic as patch, postPublic as post, ssePost,
|
||||||
|
@ -30,7 +30,7 @@ export function getUrl(url: string, isInstalledApp: boolean, installedAppId: str
|
||||||
return isInstalledApp ? `installed-apps/${installedAppId}/${url.startsWith('/') ? url.slice(1) : url}` : url
|
return isInstalledApp ? `installed-apps/${installedAppId}/${url.startsWith('/') ? url.slice(1) : url}` : url
|
||||||
}
|
}
|
||||||
|
|
||||||
export const sendChatMessage = async (body: Record<string, any>, { onData, onCompleted, onThought, onFile, onError, getAbortController, onMessageEnd, onMessageReplace }: {
|
export const sendChatMessage = async (body: Record<string, any>, { onData, onCompleted, onThought, onFile, onError, getAbortController, onMessageEnd, onMessageReplace, onTTSChunk, onTTSEnd }: {
|
||||||
onData: IOnData
|
onData: IOnData
|
||||||
onCompleted: IOnCompleted
|
onCompleted: IOnCompleted
|
||||||
onFile: IOnFile
|
onFile: IOnFile
|
||||||
|
@ -39,13 +39,15 @@ export const sendChatMessage = async (body: Record<string, any>, { onData, onCom
|
||||||
onMessageEnd?: IOnMessageEnd
|
onMessageEnd?: IOnMessageEnd
|
||||||
onMessageReplace?: IOnMessageReplace
|
onMessageReplace?: IOnMessageReplace
|
||||||
getAbortController?: (abortController: AbortController) => void
|
getAbortController?: (abortController: AbortController) => void
|
||||||
|
onTTSChunk?: IOnTTSChunk
|
||||||
|
onTTSEnd?: IOnTTSEnd
|
||||||
}, isInstalledApp: boolean, installedAppId = '') => {
|
}, isInstalledApp: boolean, installedAppId = '') => {
|
||||||
return ssePost(getUrl('chat-messages', isInstalledApp, installedAppId), {
|
return ssePost(getUrl('chat-messages', isInstalledApp, installedAppId), {
|
||||||
body: {
|
body: {
|
||||||
...body,
|
...body,
|
||||||
response_mode: 'streaming',
|
response_mode: 'streaming',
|
||||||
},
|
},
|
||||||
}, { onData, onCompleted, onThought, onFile, isPublicAPI: !isInstalledApp, onError, getAbortController, onMessageEnd, onMessageReplace })
|
}, { onData, onCompleted, onThought, onFile, isPublicAPI: !isInstalledApp, onError, getAbortController, onMessageEnd, onMessageReplace, onTTSChunk, onTTSEnd })
|
||||||
}
|
}
|
||||||
|
|
||||||
export const stopChatMessageResponding = async (appId: string, taskId: string, isInstalledApp: boolean, installedAppId = '') => {
|
export const stopChatMessageResponding = async (appId: string, taskId: string, isInstalledApp: boolean, installedAppId = '') => {
|
||||||
|
@ -214,6 +216,10 @@ export const textToAudio = (url: string, isPublicAPI: boolean, body: FormData) =
|
||||||
return (getAction('post', !isPublicAPI))(url, { body }, { bodyStringify: false, deleteContentType: true }) as Promise<{ data: string }>
|
return (getAction('post', !isPublicAPI))(url, { body }, { bodyStringify: false, deleteContentType: true }) as Promise<{ data: string }>
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export const textToAudioStream = (url: string, isPublicAPI: boolean, header: { content_type: string }, body: { streaming: boolean; voice?: string; message_id?: string; text?: string | null | undefined }) => {
|
||||||
|
return (getAction('post', !isPublicAPI))(url, { body, header }, { needAllResponseContent: true })
|
||||||
|
}
|
||||||
|
|
||||||
export const fetchAccessToken = async (appCode: string) => {
|
export const fetchAccessToken = async (appCode: string) => {
|
||||||
const headers = new Headers()
|
const headers = new Headers()
|
||||||
headers.append('X-App-Code', appCode)
|
headers.append('X-App-Code', appCode)
|
||||||
|
|
|
@ -160,6 +160,7 @@ export type ModelConfig = {
|
||||||
enabled: boolean
|
enabled: boolean
|
||||||
voice?: string
|
voice?: string
|
||||||
language?: string
|
language?: string
|
||||||
|
autoPlay?: TtsAutoPlay
|
||||||
}
|
}
|
||||||
retriever_resource: {
|
retriever_resource: {
|
||||||
enabled: boolean
|
enabled: boolean
|
||||||
|
@ -349,6 +350,11 @@ export enum TransferMethod {
|
||||||
remote_url = 'remote_url',
|
remote_url = 'remote_url',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export enum TtsAutoPlay {
|
||||||
|
enabled = 'enabled',
|
||||||
|
disabled = 'disabled',
|
||||||
|
}
|
||||||
|
|
||||||
export const ALLOW_FILE_EXTENSIONS = ['png', 'jpg', 'jpeg', 'webp', 'gif']
|
export const ALLOW_FILE_EXTENSIONS = ['png', 'jpg', 'jpeg', 'webp', 'gif']
|
||||||
|
|
||||||
export type VisionSettings = {
|
export type VisionSettings = {
|
||||||
|
|
Loading…
Reference in New Issue
Block a user