feat: correctly delete applications using Celery workers (#5787)

This commit is contained in:
Charles Zhou 2024-07-01 01:21:17 -05:00 committed by GitHub
parent 5692f9b33b
commit cb09dbef66
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 158 additions and 60 deletions

View File

@ -3,7 +3,7 @@
cd web && npm install
echo 'alias start-api="cd /workspaces/dify/api && flask run --host 0.0.0.0 --port=5001 --debug"' >> ~/.bashrc
echo 'alias start-worker="cd /workspaces/dify/api && celery -A app.celery worker -P gevent -c 1 --loglevel INFO -Q dataset,generation,mail,ops_trace"' >> ~/.bashrc
echo 'alias start-worker="cd /workspaces/dify/api && celery -A app.celery worker -P gevent -c 1 --loglevel INFO -Q dataset,generation,mail,ops_trace,app_deletion"' >> ~/.bashrc
echo 'alias start-web="cd /workspaces/dify/web && npm run dev"' >> ~/.bashrc
echo 'alias start-containers="cd /workspaces/dify/docker && docker-compose -f docker-compose.middleware.yaml -p dify up -d"' >> ~/.bashrc

2
.vscode/launch.json vendored
View File

@ -48,7 +48,7 @@
"--loglevel",
"info",
"-Q",
"dataset,generation,mail,ops_trace"
"dataset,generation,mail,ops_trace,app_deletion"
]
},
]

View File

@ -66,7 +66,7 @@
10. If you need to debug local async processing, please start the worker service.
```bash
poetry run python -m celery -A app.celery worker -P gevent -c 1 --loglevel INFO -Q dataset,generation,mail,ops_trace
poetry run python -m celery -A app.celery worker -P gevent -c 1 --loglevel INFO -Q dataset,generation,mail,ops_trace,app_deletion
```
The started celery app handles the async tasks, e.g. dataset importing and documents indexing.

View File

@ -9,7 +9,7 @@ fi
if [[ "${MODE}" == "worker" ]]; then
celery -A app.celery worker -P ${CELERY_WORKER_CLASS:-gevent} -c ${CELERY_WORKER_AMOUNT:-1} --loglevel INFO \
-Q ${CELERY_QUEUES:-dataset,generation,mail,ops_trace}
-Q ${CELERY_QUEUES:-dataset,generation,mail,ops_trace,app_deletion}
elif [[ "${MODE}" == "beat" ]]; then
celery -A app.celery beat --loglevel INFO
else

View File

@ -3,9 +3,6 @@ from blinker import signal
# sender: app
app_was_created = signal('app-was-created')
# sender: app
app_was_deleted = signal('app-was-deleted')
# sender: app, kwargs: app_model_config
app_model_config_was_updated = signal('app-model-config-was-updated')

View File

@ -4,10 +4,7 @@ from .create_document_index import handle
from .create_installed_app_when_app_created import handle
from .create_site_record_when_app_created import handle
from .deduct_quota_when_messaeg_created import handle
from .delete_installed_app_when_app_deleted import handle
from .delete_site_record_when_app_deleted import handle
from .delete_tool_parameters_cache_when_sync_draft_workflow import handle
from .delete_workflow_as_tool_when_app_deleted import handle
from .update_app_dataset_join_when_app_model_config_updated import handle
from .update_app_dataset_join_when_app_published_workflow_updated import handle
from .update_provider_last_used_at_when_messaeg_created import handle

View File

@ -1,12 +0,0 @@
from events.app_event import app_was_deleted
from extensions.ext_database import db
from models.model import InstalledApp
@app_was_deleted.connect
def handle(sender, **kwargs):
app = sender
installed_apps = db.session.query(InstalledApp).filter(InstalledApp.app_id == app.id).all()
for installed_app in installed_apps:
db.session.delete(installed_app)
db.session.commit()

View File

@ -1,11 +0,0 @@
from events.app_event import app_was_deleted
from extensions.ext_database import db
from models.model import Site
@app_was_deleted.connect
def handle(sender, **kwargs):
app = sender
site = db.session.query(Site).filter(Site.app_id == app.id).first()
db.session.delete(site)
db.session.commit()

View File

@ -1,14 +0,0 @@
from events.app_event import app_was_deleted
from extensions.ext_database import db
from models.tools import WorkflowToolProvider
@app_was_deleted.connect
def handle(sender, **kwargs):
app = sender
workflow_tools = db.session.query(WorkflowToolProvider).filter(
WorkflowToolProvider.app_id == app.id
).all()
for workflow_tool in workflow_tools:
db.session.delete(workflow_tool)
db.session.commit()

View File

@ -16,13 +16,14 @@ from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelTy
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
from core.tools.tool_manager import ToolManager
from core.tools.utils.configuration import ToolParameterConfigurationManager
from events.app_event import app_model_config_was_updated, app_was_created, app_was_deleted
from events.app_event import app_model_config_was_updated, app_was_created
from extensions.ext_database import db
from models.account import Account
from models.model import App, AppMode, AppModelConfig
from models.tools import ApiToolProvider
from services.tag_service import TagService
from services.workflow_service import WorkflowService
from tasks.remove_app_and_related_data_task import remove_app_and_related_data_task
class AppService:
@ -393,18 +394,8 @@ class AppService:
Delete app
:param app: App instance
"""
db.session.delete(app)
db.session.commit()
app_was_deleted.send(app)
# todo async delete related data by event
# app_model_configs, site, api_tokens, installed_apps, recommended_apps BY app
# app_annotation_hit_histories, app_annotation_settings, app_dataset_joins BY app
# workflows, workflow_runs, workflow_node_executions, workflow_app_logs BY app
# conversations, pinned_conversations, messages BY app
# message_feedbacks, message_annotations, message_chains BY message
# message_agent_thoughts, message_files, saved_messages BY message
# Trigger asynchronous deletion of app and related data
remove_app_and_related_data_task.delay(app.id)
def get_app_meta(self, app_model: App) -> dict:
"""

View File

@ -0,0 +1,150 @@
import logging
import time
import click
from celery import shared_task
from sqlalchemy import select
from sqlalchemy.exc import SQLAlchemyError
from extensions.ext_database import db
from models.dataset import AppDatasetJoin
from models.model import (
ApiToken,
App,
AppAnnotationHitHistory,
AppAnnotationSetting,
AppModelConfig,
Conversation,
EndUser,
InstalledApp,
Message,
MessageAgentThought,
MessageAnnotation,
MessageChain,
MessageFeedback,
MessageFile,
RecommendedApp,
Site,
TagBinding,
)
from models.tools import WorkflowToolProvider
from models.web import PinnedConversation, SavedMessage
from models.workflow import Workflow, WorkflowAppLog, WorkflowNodeExecution, WorkflowRun
@shared_task(queue='app_deletion', bind=True, max_retries=3)
def remove_app_and_related_data_task(self, app_id: str):
logging.info(click.style(f'Start deleting app and related data: {app_id}', fg='green'))
start_at = time.perf_counter()
deletion_cache_key = f'app_{app_id}_deletion'
try:
# Use a transaction to ensure all deletions succeed or none do
with db.session.begin_nested():
app = db.session.query(App).filter(App.id == app_id).first()
if not app:
logging.warning(click.style(f"App {app_id} not found", fg='yellow'))
return
# Delete related data
_delete_app_model_configs(app_id)
_delete_app_site(app_id)
_delete_app_api_tokens(app_id)
_delete_installed_apps(app_id)
_delete_recommended_apps(app_id)
_delete_app_annotation_data(app_id)
_delete_app_dataset_joins(app_id)
_delete_app_workflows(app_id)
_delete_app_conversations(app_id)
_delete_app_messages(app_id)
_delete_workflow_tool_providers(app_id)
_delete_app_tag_bindings(app_id)
_delete_end_users(app_id)
# Delete the app itself
db.session.delete(app)
# If we reach here, the transaction was successful
db.session.commit()
end_at = time.perf_counter()
logging.info(click.style(f'App and related data deleted: {app_id} latency: {end_at - start_at}', fg='green'))
except SQLAlchemyError as e:
db.session.rollback()
logging.exception(click.style(f"Database error occurred while deleting app {app_id} and related data", fg='red'))
raise self.retry(exc=e, countdown=60) # Retry after 60 seconds
except Exception as e:
logging.exception(click.style(f"Error occurred while deleting app {app_id} and related data", fg='red'))
raise self.retry(exc=e, countdown=60) # Retry after 60 seconds
def _delete_app_model_configs(app_id: str):
db.session.query(AppModelConfig).filter(AppModelConfig.app_id == app_id).delete()
def _delete_app_site(app_id: str):
db.session.query(Site).filter(Site.app_id == app_id).delete()
def _delete_app_api_tokens(app_id: str):
db.session.query(ApiToken).filter(ApiToken.app_id == app_id).delete()
def _delete_installed_apps(app_id: str):
db.session.query(InstalledApp).filter(InstalledApp.app_id == app_id).delete()
def _delete_recommended_apps(app_id: str):
db.session.query(RecommendedApp).filter(RecommendedApp.app_id == app_id).delete()
def _delete_app_annotation_data(app_id: str):
db.session.query(AppAnnotationHitHistory).filter(AppAnnotationHitHistory.app_id == app_id).delete()
db.session.query(AppAnnotationSetting).filter(AppAnnotationSetting.app_id == app_id).delete()
def _delete_app_dataset_joins(app_id: str):
db.session.query(AppDatasetJoin).filter(AppDatasetJoin.app_id == app_id).delete()
def _delete_app_workflows(app_id: str):
db.session.query(WorkflowRun).filter(
WorkflowRun.workflow_id.in_(
db.session.query(Workflow.id).filter(Workflow.app_id == app_id)
)
).delete(synchronize_session=False)
db.session.query(WorkflowNodeExecution).filter(
WorkflowNodeExecution.workflow_id.in_(
db.session.query(Workflow.id).filter(Workflow.app_id == app_id)
)
).delete(synchronize_session=False)
db.session.query(WorkflowAppLog).filter(WorkflowAppLog.app_id == app_id).delete(synchronize_session=False)
db.session.query(Workflow).filter(Workflow.app_id == app_id).delete(synchronize_session=False)
def _delete_app_conversations(app_id: str):
db.session.query(PinnedConversation).filter(
PinnedConversation.conversation_id.in_(
db.session.query(Conversation.id).filter(Conversation.app_id == app_id)
)
).delete(synchronize_session=False)
db.session.query(Conversation).filter(Conversation.app_id == app_id).delete()
def _delete_app_messages(app_id: str):
message_ids = select(Message.id).filter(Message.app_id == app_id).scalar_subquery()
db.session.query(MessageFeedback).filter(MessageFeedback.message_id.in_(message_ids)).delete(synchronize_session=False)
db.session.query(MessageAnnotation).filter(MessageAnnotation.message_id.in_(message_ids)).delete(synchronize_session=False)
db.session.query(MessageChain).filter(MessageChain.message_id.in_(message_ids)).delete(synchronize_session=False)
db.session.query(MessageAgentThought).filter(MessageAgentThought.message_id.in_(message_ids)).delete(synchronize_session=False)
db.session.query(MessageFile).filter(MessageFile.message_id.in_(message_ids)).delete(synchronize_session=False)
db.session.query(SavedMessage).filter(SavedMessage.message_id.in_(message_ids)).delete(synchronize_session=False)
db.session.query(Message).filter(Message.app_id == app_id).delete(synchronize_session=False)
def _delete_workflow_tool_providers(app_id: str):
db.session.query(WorkflowToolProvider).filter(
WorkflowToolProvider.app_id == app_id
).delete(synchronize_session=False)
def _delete_app_tag_bindings(app_id: str):
db.session.query(TagBinding).filter(
TagBinding.target_id == app_id
).delete(synchronize_session=False)
def _delete_end_users(app_id: str):
db.session.query(EndUser).filter(EndUser.app_id == app_id).delete()