Initial commit

This commit is contained in:
John Wang 2023-05-15 08:51:32 +08:00
commit db896255d6
744 changed files with 56028 additions and 0 deletions

61
.github/workflows/build-api-image.sh vendored Normal file
View File

@ -0,0 +1,61 @@
#!/usr/bin/env bash
set -eo pipefail
SHA=$(git rev-parse HEAD)
REPO_NAME=langgenius/dify
API_REPO_NAME="${REPO_NAME}-api"
if [[ "${GITHUB_EVENT_NAME}" == "pull_request" ]]; then
REFSPEC=$(echo "${GITHUB_HEAD_REF}" | sed 's/[^a-zA-Z0-9]/-/g' | head -c 40)
PR_NUM=$(echo "${GITHUB_REF}" | sed 's:refs/pull/::' | sed 's:/merge::')
LATEST_TAG="pr-${PR_NUM}"
CACHE_FROM_TAG="latest"
elif [[ "${GITHUB_EVENT_NAME}" == "release" ]]; then
REFSPEC=$(echo "${GITHUB_REF}" | sed 's:refs/tags/::' | head -c 40)
LATEST_TAG="${REFSPEC}"
CACHE_FROM_TAG="latest"
else
REFSPEC=$(echo "${GITHUB_REF}" | sed 's:refs/heads/::' | sed 's/[^a-zA-Z0-9]/-/g' | head -c 40)
LATEST_TAG="${REFSPEC}"
CACHE_FROM_TAG="${REFSPEC}"
fi
if [[ "${REFSPEC}" == "main" ]]; then
LATEST_TAG="latest"
CACHE_FROM_TAG="latest"
fi
echo "Pulling cache image ${API_REPO_NAME}:${CACHE_FROM_TAG}"
if docker pull "${API_REPO_NAME}:${CACHE_FROM_TAG}"; then
API_CACHE_FROM_SCRIPT="--cache-from ${API_REPO_NAME}:${CACHE_FROM_TAG}"
else
echo "WARNING: Failed to pull ${API_REPO_NAME}:${CACHE_FROM_TAG}, disable build image cache."
API_CACHE_FROM_SCRIPT=""
fi
cat<<EOF
Rolling with tags:
- ${API_REPO_NAME}:${SHA}
- ${API_REPO_NAME}:${REFSPEC}
- ${API_REPO_NAME}:${LATEST_TAG}
EOF
#
# Build image
#
cd api
docker build \
${API_CACHE_FROM_SCRIPT} \
--build-arg COMMIT_SHA=${SHA} \
-t "${API_REPO_NAME}:${SHA}" \
-t "${API_REPO_NAME}:${REFSPEC}" \
-t "${API_REPO_NAME}:${LATEST_TAG}" \
--label "sha=${SHA}" \
--label "built_at=$(date)" \
--label "build_actor=${GITHUB_ACTOR}" \
.
# push
docker push --all-tags "${API_REPO_NAME}"

43
.github/workflows/build-api-image.yml vendored Normal file
View File

@ -0,0 +1,43 @@
name: Build and Push API Image
on:
push:
branches:
- 'main'
- 'deploy/dev'
pull_request:
types: [synchronize, opened, reopened, ready_for_review]
jobs:
build-and-push:
runs-on: ubuntu-latest
if: github.event.pull_request.draft == false
steps:
- name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
uses: actions/checkout@v2
with:
persist-credentials: false
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USER }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Build and push Docker image
shell: bash
env:
DOCKERHUB_USER: ${{ secrets.DOCKERHUB_USER }}
DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}
run: |
/bin/bash .github/workflows/build-api-image.sh
- name: Deploy to server
if: github.ref == 'refs/heads/deploy/dev'
uses: appleboy/ssh-action@v0.1.8
with:
host: ${{ secrets.SSH_HOST }}
username: ${{ secrets.SSH_USER }}
key: ${{ secrets.SSH_PRIVATE_KEY }}
script: |
${{ secrets.SSH_SCRIPT }}

60
.github/workflows/build-web-image.sh vendored Normal file
View File

@ -0,0 +1,60 @@
#!/usr/bin/env bash
set -eo pipefail
SHA=$(git rev-parse HEAD)
REPO_NAME=langgenius/dify
WEB_REPO_NAME="${REPO_NAME}-web"
if [[ "${GITHUB_EVENT_NAME}" == "pull_request" ]]; then
REFSPEC=$(echo "${GITHUB_HEAD_REF}" | sed 's/[^a-zA-Z0-9]/-/g' | head -c 40)
PR_NUM=$(echo "${GITHUB_REF}" | sed 's:refs/pull/::' | sed 's:/merge::')
LATEST_TAG="pr-${PR_NUM}"
CACHE_FROM_TAG="latest"
elif [[ "${GITHUB_EVENT_NAME}" == "release" ]]; then
REFSPEC=$(echo "${GITHUB_REF}" | sed 's:refs/tags/::' | head -c 40)
LATEST_TAG="${REFSPEC}"
CACHE_FROM_TAG="latest"
else
REFSPEC=$(echo "${GITHUB_REF}" | sed 's:refs/heads/::' | sed 's/[^a-zA-Z0-9]/-/g' | head -c 40)
LATEST_TAG="${REFSPEC}"
CACHE_FROM_TAG="${REFSPEC}"
fi
if [[ "${REFSPEC}" == "main" ]]; then
LATEST_TAG="latest"
CACHE_FROM_TAG="latest"
fi
echo "Pulling cache image ${WEB_REPO_NAME}:${CACHE_FROM_TAG}"
if docker pull "${WEB_REPO_NAME}:${CACHE_FROM_TAG}"; then
WEB_CACHE_FROM_SCRIPT="--cache-from ${WEB_REPO_NAME}:${CACHE_FROM_TAG}"
else
echo "WARNING: Failed to pull ${WEB_REPO_NAME}:${CACHE_FROM_TAG}, disable build image cache."
WEB_CACHE_FROM_SCRIPT=""
fi
cat<<EOF
Rolling with tags:
- ${WEB_REPO_NAME}:${SHA}
- ${WEB_REPO_NAME}:${REFSPEC}
- ${WEB_REPO_NAME}:${LATEST_TAG}
EOF
#
# Build image
#
cd web
docker build \
${WEB_CACHE_FROM_SCRIPT} \
--build-arg COMMIT_SHA=${SHA} \
-t "${WEB_REPO_NAME}:${SHA}" \
-t "${WEB_REPO_NAME}:${REFSPEC}" \
-t "${WEB_REPO_NAME}:${LATEST_TAG}" \
--label "sha=${SHA}" \
--label "built_at=$(date)" \
--label "build_actor=${GITHUB_ACTOR}" \
.
docker push --all-tags "${WEB_REPO_NAME}"

43
.github/workflows/build-web-image.yml vendored Normal file
View File

@ -0,0 +1,43 @@
name: Build and Push WEB Image
on:
push:
branches:
- 'main'
- 'deploy/dev'
pull_request:
types: [synchronize, opened, reopened, ready_for_review]
jobs:
build-and-push:
runs-on: ubuntu-latest
if: github.event.pull_request.draft == false
steps:
- name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
uses: actions/checkout@v2
with:
persist-credentials: false
- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USER }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Build and push Docker image
shell: bash
env:
DOCKERHUB_USER: ${{ secrets.DOCKERHUB_USER }}
DOCKERHUB_TOKEN: ${{ secrets.DOCKERHUB_TOKEN }}
run: |
/bin/bash .github/workflows/build-web-image.sh
- name: Deploy to server
if: github.ref == 'refs/heads/deploy/dev'
uses: appleboy/ssh-action@v0.1.8
with:
host: ${{ secrets.SSH_HOST }}
username: ${{ secrets.SSH_USER }}
key: ${{ secrets.SSH_PRIVATE_KEY }}
script: |
${{ secrets.SSH_SCRIPT }}

View File

@ -0,0 +1,35 @@
import os
import re
from zhon.hanzi import punctuation
def has_chinese_characters(text):
for char in text:
if '\u4e00' <= char <= '\u9fff' or char in punctuation:
return True
return False
def check_file_for_chinese_comments(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
for line_number, line in enumerate(file, start=1):
if has_chinese_characters(line):
print(f"Found Chinese characters in {file_path} on line {line_number}:")
print(line.strip())
return True
return False
def main():
has_chinese = False
excluded_files = ["model_template.py", 'stopwords.py', 'commands.py', 'indexing_runner.py']
for root, _, files in os.walk("."):
for file in files:
if file.endswith(".py") and file not in excluded_files:
file_path = os.path.join(root, file)
if check_file_for_chinese_comments(file_path):
has_chinese = True
if has_chinese:
raise Exception("Found Chinese characters in Python files. Please remove them.")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,31 @@
name: Check for Chinese comments
on:
push:
branches:
- 'main'
pull_request:
branches:
- main
jobs:
check-chinese-comments:
runs-on: ubuntu-latest
steps:
- name: Check out repository
uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.9
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install zhon
- name: Run script to check for Chinese comments
run: |
python .github/workflows/check_no_chinese_comments.py

19
.github/workflows/flake8.yml vendored Normal file
View File

@ -0,0 +1,19 @@
name: PEP8 Check
on:
push:
branches: [main]
pull_request:
branches: [main]
jobs:
pep8:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python 3.10
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install flake8
run: pip install flake8
- name: Run flake8
run: flake8 --ignore=E501 .

149
.gitignore vendored Normal file
View File

@ -0,0 +1,149 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
.idea/'
.DS_Store
.vscode
# Intellij IDEA Files
.idea/
.ideaDataSources/
api/.env
api/storage/*
docker/volumes/app/storage/privkeys/*
docker/volumes/db/data/*
docker/volumes/redis/data/*
docker/volumes/weaviate/*
sdks/python-client/build
sdks/python-client/dist
sdks/python-client/dify_client.egg-info

6
AUTHORS Normal file
View File

@ -0,0 +1,6 @@
nite-knite
goocarlos
crazywoola
iamjoel
idsong
takatost

56
CONTRIBUTING.md Normal file
View File

@ -0,0 +1,56 @@
# Contributing
Thanks for your interest in [Dify](https://dify.ai) and for wanting to contribute! Before you begin, read the
[code of conduct](https://github.com/langgenius/.github/blob/main/CODE_OF_CONDUCT.md) and check out the
[existing issues](https://github.com/langgenius/langgenius-gateway/issues).
This document describes how to set up your development environment to build and test [Dify](https://dify.ai).
### Install dependencies
You need to install and configure the following dependencies on your machine to build [Dify](https://dify.ai):
- [Git](http://git-scm.com/)
- [Docker](https://www.docker.com/)
- [Docker Compose](https://docs.docker.com/compose/install/)
- [Node.js v18.x (LTS)](http://nodejs.org)
- [npm](https://www.npmjs.com/) version 8.x.x or [Yarn](https://yarnpkg.com/)
- [Python](https://www.python.org/) version 3.10.x
## Local development
To set up a working development environment, just fork the project git repository and install the backend and frontend dependencies using the proper package manager and create run the docker-compose stack.
### Fork the repository
you need to fork the [repository](https://github.com/langgenius/langgenius-gateway).
### Clone the repo
Clone your GitHub forked repository:
```
git clone git@github.com:<github_username>/langgenius-gateway.git
```
### Install backend
To learn how to install the backend application, please refer to the [Backend README](api/README.md).
### Install frontend
To learn how to install the frontend application, please refer to the [Frontend README](web/README.md).
### Visit dify in your browser
Finally, you can now visit [http://localhost:3000](http://localhost:3000) to view the [Dify](https://dify.ai) in local environment.
## Create a pull request
After making your changes, open a pull request (PR). Once you submit your pull request, others from the Dify team/community will review it with you.
Did you have an issue, like a merge conflict, or don't know how to open a pull request? Check out [GitHub's pull request tutorial](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests) on how to resolve merge conflicts and other issues. Once your PR has been merged, you will be proudly listed as a contributor in the [contributor chart](https://github.com/langgenius/langgenius-gateway/graphs/contributors).
## Community channels
Stuck somewhere? Have any questions? Join the [Discord Community Server](https://discord.gg/AhzKf7dNgk). We are here to help!

53
CONTRIBUTING_CN.md Normal file
View File

@ -0,0 +1,53 @@
# 贡献
感谢您对 [Dify](https://dify.ai) 的兴趣,并希望您能够做出贡献!在开始之前,请先阅读[行为准则](https://github.com/langgenius/.github/blob/main/CODE_OF_CONDUCT.md)并查看[现有问题](https://github.com/langgenius/dify/issues)。
本文档介绍了如何设置开发环境以构建和测试 [Dify](https://dify.ai)。
### 安装依赖项
您需要在计算机上安装和配置以下依赖项才能构建 [Dify](https://dify.ai)
- [Git](http://git-scm.com/)
- [Docker](https://www.docker.com/)
- [Docker Compose](https://docs.docker.com/compose/install/)
- [Node.js v18.x (LTS)](http://nodejs.org)
- [npm](https://www.npmjs.com/) 版本 8.x.x 或 [Yarn](https://yarnpkg.com/)
- [Python](https://www.python.org/) 版本 3.10.x
## 本地开发
要设置一个可工作的开发环境,只需 fork 项目的 git 存储库,并使用适当的软件包管理器安装后端和前端依赖项,然后创建并运行 docker-compose 堆栈。
### Fork存储库
您需要 fork [存储库](https://github.com/langgenius/dify)。
### 克隆存储库
克隆您在 GitHub 上 fork 的存储库:
```
git clone git@github.com:<github_username>/dify.git
```
### 安装后端
要了解如何安装后端应用程序,请参阅[后端 README](api/README.md)。
### 安装前端
要了解如何安装前端应用程序,请参阅[前端 README](web/README.md)。
### 在浏览器中访问 Dify
最后,您现在可以访问 [http://localhost:3000](http://localhost:3000) 在本地环境中查看 [Dify](https://dify.ai)。
## 创建拉取请求
在进行更改后打开一个拉取请求PR。提交拉取请求后Dify 团队/社区的其他人将与您一起审查它。
如果遇到问题,比如合并冲突或不知道如何打开拉取请求,请查看 GitHub 的[拉取请求教程](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests),了解如何解决合并冲突和其他问题。一旦您的 PR 被合并,您将自豪地被列为[贡献者表](https://github.com/langgenius/dify/graphs/contributors)中的一员。
## 社区渠道
遇到困难了吗?有任何问题吗? 加入 [Discord Community Server](https://discord.gg/AhzKf7dNgk),我们将为您提供帮助。

46
LICENSE Normal file
View File

@ -0,0 +1,46 @@
# Dify Open Source License
The Dify project uses a combination of the Apache License 2.0, MIT License, and an additional agreement to protect against direct competition with Dify Cloud services.
As a contributor, you should agree that your contributed code:
a. Might be subject to a more permissive open source license in the future.
b. Can be used for commercial purposes, such as Dify's cloud business.
The following components are open source under the MIT license, allowing you to build and develop applications based on them:
- WebApp elements, e.g., web/app/components/share
- Derived WebApp Template projects
The remaining parts of the project are open source under the Apache License 2.0.
With the Apache License 2.0, MIT License, and this supplementary agreement, anyone can freely use, modify, and distribute Dify, provided that:
- If you use Dify solely as a backend service for other applications, no authorization is needed for commercial or closed source purposes.
- If you wish to use Dify for commercial and closed source SaaS services similar to Dify Cloud, please contact us for authorization.
The interactive design of this product is protected by appearance patent.
© 2023 LangGenius, Inc.
----------
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
----------
The MIT License
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

115
README.md Normal file
View File

@ -0,0 +1,115 @@
![](./images/describe-en.png)
<p align="center">
<a href="./README.md">English</a> |
<a href="./README_CN.md">简体中文</a>
</p>
[Website](http://dify.ai) • [Docs](https://docs.dify.ai) • [Twitter](https://twitter.com/dify_ai)
**Dify** is an easy-to-use LLMOps platform designed to empower more people to create sustainable, AI-native applications. With visual orchestration for various application types, Dify offers out-of-the-box, ready-to-use applications that can also serve as Backend-as-a-Service APIs. Unify your development process with one API for plugins and datasets integration, and streamline your operations using a single interface for prompt engineering, visual analytics, and continuous improvement.
Applications created with Dify include:
Out-of-the-box web sites supporting form mode and chat conversation mode
A single API encompassing plugin capabilities, context enhancement, and more, saving you backend coding effort
Visual data analysis, log review, and annotation for applications
Dify is compatible with Langchain, meaning we'll gradually support multiple LLMs, currently supported:
- GPT 3 (text-davinci-003)
- GPT 3.5 Turbo(ChatGPT)
- GPT-4
## Use Cloud Services
Visit [Dify.ai](http://dify.ai)
## Install the Community Edition
### System Requirements
Before installing Dify, make sure your machine meets the following minimum system requirements:
- CPU >= 1 Core
- RAM >= 4GB
### Quick Start
The easiest way to start the Dify server is to run our [docker-compose.yml](docker/docker-compose.yaml) file. Before running the installation command, make sure that [Docker](https://docs.docker.com/get-docker/) and [Docker Compose](https://docs.docker.com/compose/install/) are installed on your machine:
```bash
cd docker
docker-compose up -d
```
After running, you can access the Dify console in your browser at [http://localhost](http://localhost) and start the initialization operation.
### Configuration
If you need to customize the configuration, please refer to the comments in our [docker-compose.yml](docker/docker-compose.yaml) file and manually set the environment configuration. After making the changes, please run 'docker-compose up -d' again.
## Roadmap
Features under development:
- **Datasets**, supporting more datasets, e.g. syncing content from Notion or webpages
We will support more datasets, including text, webpages, and even Notion content. Users can build AI applications based on their own data sources.
- **Plugins**, introducing ChatGPT Plugin-standard plugins for applications, or using Dify-produced plugins
We will release plugins complying with ChatGPT standard, or Dify's own plugins to enable more capabilities in applications.
- **Open-source models**, e.g. adopting Llama as a model provider or for further fine-tuning
We will work with excellent open-source models like Llama, by providing them as model options in our platform, or using them for further fine-tuning.
## Q&A
**Q: What can I do with Dify?**
A: Dify is a simple yet powerful LLM development and operations tool. You can use it to build commercial-grade applications, personal assistants. If you want to develop your own applications, LangDifyGenius can save you backend work in integrating with OpenAI and offer visual operations capabilities, allowing you to continuously improve and train your GPT model.
**Q: How do I use Dify to "train" my own model?**
A: A valuable application consists of Prompt Engineering, context enhancement, and Fine-tuning. We've created a hybrid programming approach combining Prompts with programming languages (similar to a template engine), making it easy to accomplish long-text embedding or capturing subtitles from a user-input Youtube video - all of which will be submitted as context for LLMs to process. We place great emphasis on application operability, with data generated by users during App usage available for analysis, annotation, and continuous training. Without the right tools, these steps can be time-consuming.
**Q: What do I need to prepare if I want to create my own application?**
A: We assume you already have an OpenAI API Key; if not, please register for one. If you already have some content that can serve as training context, that's great!
**Q: What interface languages are available?**
A: English and Chinese are currently supported, and you can contribute language packs to us.
## Star History
[![Star History Chart](https://api.star-history.com/svg?repos=langgenius/dify&type=Date)](https://star-history.com/#langgenius/dify&Date)
## Contact Us
If you have any questions, suggestions, or partnership inquiries, feel free to contact us through the following channels:
- Submit an Issue or PR on our GitHub Repo
- Join the discussion in our [Discord](https://discord.gg/AhzKf7dNgk) Community
- Send an email to hello@dify.ai
We're eager to assist you and together create more fun and useful AI applications!
## Contributing
To ensure proper review, all code contributions - including those from contributors with direct commit access - must be submitted via pull requests and approved by the core development team prior to being merged.
We welcome all pull requests! If you'd like to help, check out the [Contribution Guide](CONTRIBUTING.md) for more information on how to get started.
## Security
To protect your privacy, please avoid posting security issues on GitHub. Instead, send your questions to security@dify.ai and we will provide you with a more detailed answer.
## Citation
This software uses the following open-source software:
- Chase, H. (2022). LangChain [Computer software]. https://github.com/hwchase17/langchain
- Liu, J. (2022). LlamaIndex [Computer software]. doi: 10.5281/zenodo.1234.
For more information, please refer to the official website or license text of the respective software.
## License
This repository is available under the [Dify Open Source License](LICENSE).

114
README_CN.md Normal file
View File

@ -0,0 +1,114 @@
![](./images/describe-cn.jpg)
<p align="center">
<a href="./README.md">English</a> |
<a href="./README_CN.md">简体中文</a>
</p>
[官方网站](http://dify.ai) • [文档](https://docs.dify.ai/v/zh-hans) • [Twitter](https://twitter.com/dify_ai)
**Dify** 是一个易用的 LLMOps 平台,旨在让更多人可以创建可持续运营的原生 AI 应用。Dify 提供多种类型应用的可视化编排,应用可开箱即用,也能以“后端即服务”的 API 提供服务。
通过 Dify 创建的应用包含了:
- 开箱即用的的 Web 站点,支持表单模式和聊天对话模式
- 一套 API 即可包含插件、上下文增强等能力,替你省下了后端代码的编写工作
- 可视化的对应用进行数据分析,查阅日志或进行标注
Dify 兼容 Langchain这意味着我们将逐步支持多种 LLMs ,目前已支持:
- GPT 3 (text-davinci-003)
- GPT 3.5 Turbo(ChatGPT)
- GPT-4
## 使用云服务
访问 [Dify.ai](http://cloud.dify.ai)
## 安装社区版
### 系统要求
在安装 Dify 之前,请确保您的机器满足以下最低系统要求:
- CPU >= 1 Core
- RAM >= 4GB
### 快速启动
启动 Dify 服务器的最简单方法是运行我们的 [docker-compose.yml](docker/docker-compose.yaml) 文件。在运行安装命令之前,请确保您的机器上安装了 [Docker](https://docs.docker.com/get-docker/) 和 [Docker Compose](https://docs.docker.com/compose/install/)
```bash
cd docker
docker-compose up -d
```
运行后,可以在浏览器上访问 [http://localhost](http://localhost) 进入 Dify 控制台,并开始初始化操作。
### 配置
需要自定义配置,请参考我们的 [docker-compose.yml](docker/docker-compose.yaml) 文件中的注释,并手动设置环境配置,修改完毕后,请再次执行 `docker-compose up -d`
## Roadmap
我们正在开发中的功能:
- **数据集**,支持更多的数据集,例如同步 Notion 或网页的内容
我们将支持更多的数据集,包括文本、网页,甚至 Notion 内容。用户可以根据自己的数据源构建 AI 应用程序。
- **插件**,推出符合 ChatGPT 标准的插件,或使用 Dify 产生的插件
我们将发布符合 ChatGPT 标准的插件,或者 Dify 自己的插件,以在应用程序中启用更多功能。
- **开源模型**,例如采用 Llama 作为模型提供者,或进行进一步的微调
我们将与优秀的开源模型如 Llama 合作,通过在我们的平台中提供它们作为模型选项,或使用它们进行进一步的微调。
## Q&A
**Q: 我能用 Dify 做什么?**
A: Dify 是一个简单且能力丰富的 LLM 开发和运营工具。你可以用它搭建商用级应用个人助理。如果你想自己开发应用Dify 也能为你省下接入 OpenAI 的后端工作,使用我们逐步提供的可视化运营能力,你可以持续的改进和训练你的 GPT 模型。
**Q: 如何使用 Dify “训练”自己的模型?**
A: 一个有价值的应用由 Prompt Engineering、上下文增强和 Fine-tune 三个环节组成。我们创造了一种 Prompt 结合编程语言的 Hybrid 编程方式(类似一个模版引擎),你可以轻松的完成长文本嵌入,或抓取用户输入的一个 Youtube 视频的字幕——这些都将作为上下文提交给 LLMs 进行计算。我们十分注重应用的可运营性,你的用户在使用 App 期间产生的数据,可进行分析、标记和持续训练。以上环节如果没有好的工具支持,可能会消耗你大量的时间。
**Q: 如果要创建一个自己的应用,我需要准备什么?**
A: 我们假定你已经有了 OpenAI API Key如果没有请去注册一个。如果你已经有了一些内容可以作为训练上下文就太好了。
**Q: 提供哪些界面语言?**
A: 现已支持英文与中文,你可以为我们贡献语言包。
## Star History
[![Star History Chart](https://api.star-history.com/svg?repos=langgenius/dify&type=Date)](https://star-history.com/#langgenius/dify&Date)
## 联系我们
如果您有任何问题、建议或合作意向,欢迎通过以下方式联系我们:
- 在我们的 [GitHub Repo](https://github.com/langgenius/dify) 上提交 Issue 或 PR
- 在我们的 [Discord 社区](https://discord.gg/AhzKf7dNgk) 上加入讨论
- 发送邮件至 hello@dify.ai
## 贡献代码
为了确保正确审查,所有代码贡献 - 包括来自具有直接提交更改权限的贡献者 - 都必须提交 PR 请求并在合并分支之前得到核心开发人员的批准。
我们欢迎所有人提交 PR如果您愿意提供帮助可以在 [贡献指南](CONTRIBUTING_CN.md) 中了解有关如何为项目做出贡献的更多信息。
## 安全
为了保护您的隐私,请避免在 GitHub 上发布安全问题。发送问题至 security@dify.ai我们将为您做更细致的解答。
## Citation
本软件使用了以下开源软件:
- Chase, H. (2022). LangChain [Computer software]. https://github.com/hwchase17/langchain
- Liu, J. (2022). LlamaIndex [Computer software]. doi: 10.5281/zenodo.1234.
更多信息,请参考相应软件的官方网站或许可证文本。
## License
本仓库遵循 [Dify Open Source License](LICENSE) 开源协议。

2
api/.dockerignore Normal file
View File

@ -0,0 +1,2 @@
.env
storage/privkeys/*

85
api/.env.example Normal file
View File

@ -0,0 +1,85 @@
# Server Edition
EDITION=SELF_HOSTED
# Your App secret key will be used for securely signing the session cookie
# Make sure you are changing this key for your deployment with a strong key.
# You can generate a strong key using `openssl rand -base64 42`.
# Alternatively you can set it with `SECRET_KEY` environment variable.
SECRET_KEY=
# Console API base URL
CONSOLE_URL=http://127.0.0.1:5001
# Service API base URL
API_URL=http://127.0.0.1:5001
# Web APP base URL
APP_URL=http://127.0.0.1:5001
# celery configuration
CELERY_BROKER_URL=redis://:difyai123456@localhost:6379/1
# redis configuration
REDIS_HOST=localhost
REDIS_PORT=6379
REDIS_PASSWORD=difyai123456
REDIS_DB=0
# PostgreSQL database configuration
DB_USERNAME=postgres
DB_PASSWORD=difyai123456
DB_HOST=localhost
DB_PORT=5432
DB_DATABASE=dify
# Storage configuration
# use for store upload files, private keys...
# storage type: local, s3
STORAGE_TYPE=local
STORAGE_LOCAL_PATH=storage
S3_ENDPOINT=https://your-bucket-name.storage.s3.clooudflare.com
S3_BUCKET_NAME=your-bucket-name
S3_ACCESS_KEY=your-access-key
S3_SECRET_KEY=your-secret-key
S3_REGION=your-region
# CORS configuration
WEB_API_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
CONSOLE_CORS_ALLOW_ORIGINS=http://127.0.0.1:3000,*
# Cookie configuration
COOKIE_HTTPONLY=true
COOKIE_SAMESITE=None
COOKIE_SECURE=true
# Session configuration
SESSION_PERMANENT=true
SESSION_USE_SIGNER=true
## support redis, sqlalchemy
SESSION_TYPE=redis
# session redis configuration
SESSION_REDIS_HOST=localhost
SESSION_REDIS_PORT=6379
SESSION_REDIS_PASSWORD=difyai123456
SESSION_REDIS_DB=2
# Vector database configuration, support: weaviate, qdrant
VECTOR_STORE=weaviate
# Weaviate configuration
WEAVIATE_ENDPOINT=http://localhost:8080
WEAVIATE_API_KEY=WVF5YThaHlkYwhGUSmCRgsX3tD5ngdN8pkih
WEAVIATE_GRPC_ENABLED=false
# Qdrant configuration, use `path:` prefix for local mode or `https://your-qdrant-cluster-url.qdrant.io` for remote mode
QDRANT_URL=path:storage/qdrant
QDRANT_API_KEY=your-qdrant-api-key
# Sentry configuration
SENTRY_DSN=
# DEBUG
DEBUG=false
SQLALCHEMY_ECHO=false

28
api/Dockerfile Normal file
View File

@ -0,0 +1,28 @@
FROM langgenius/base:1.0.0-bullseye-slim as langgenius-api
LABEL maintainer="takatost@gmail.com"
ENV FLASK_APP app.py
ENV EDITION SELF_HOSTED
ENV DEPLOY_ENV PRODUCTION
ENV CONSOLE_URL http://127.0.0.1:5001
ENV API_URL http://127.0.0.1:5001
ENV APP_URL http://127.0.0.1:5001
EXPOSE 5001
WORKDIR /app/api
COPY requirements.txt /app/api/requirements.txt
RUN pip install -r requirements.txt
COPY . /app/api/
COPY docker/entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh
ARG COMMIT_SHA
ENV COMMIT_SHA ${COMMIT_SHA}
ENTRYPOINT ["/entrypoint.sh"]

35
api/README.md Normal file
View File

@ -0,0 +1,35 @@
# Dify Backend API
## Usage
1. Start the docker-compose stack
The backend require some middleware, including PostgreSQL, Redis, and Weaviate, which can be started together using `docker-compose`.
```bash
cd ../docker
docker-compose -f docker-compose.middleware.yaml up -d
cd ../api
```
2. Copy `.env.example` to `.env`
3. Generate a `SECRET_KEY` in the `.env` file.
```bash
openssl rand -base64 42
```
4. Install dependencies
```bash
pip install -r requirements.txt
```
5. Run migrate
Before the first launch, migrate the database to the latest version.
```bash
flask db upgrade
```
6. Start backend:
```bash
flask run --host 0.0.0.0 --port=5001 --debug
```
7. Setup your application by visiting http://localhost:5001/console/api/setup or other apis...

222
api/app.py Normal file
View File

@ -0,0 +1,222 @@
# -*- coding:utf-8 -*-
import os
if not os.environ.get("DEBUG") or os.environ.get("DEBUG").lower() != 'true':
from gevent import monkey
monkey.patch_all()
import logging
import json
import threading
from flask import Flask, request, Response, session
import flask_login
from flask_cors import CORS
from extensions import ext_session, ext_celery, ext_sentry, ext_redis, ext_login, ext_vector_store, ext_migrate, \
ext_database, ext_storage
from extensions.ext_database import db
from extensions.ext_login import login_manager
# DO NOT REMOVE BELOW
from models import model, account, dataset, web, task
from events import event_handlers
# DO NOT REMOVE ABOVE
import core
from config import Config, CloudEditionConfig
from commands import register_commands
from models.account import TenantAccountJoin
from models.model import Account, EndUser, App
import warnings
warnings.simplefilter("ignore", ResourceWarning)
class DifyApp(Flask):
pass
# -------------
# Configuration
# -------------
config_type = os.getenv('EDITION', default='SELF_HOSTED') # ce edition first
# ----------------------------
# Application Factory Function
# ----------------------------
def create_app(test_config=None) -> Flask:
app = DifyApp(__name__)
if test_config:
app.config.from_object(test_config)
else:
if config_type == "CLOUD":
app.config.from_object(CloudEditionConfig())
else:
app.config.from_object(Config())
app.secret_key = app.config['SECRET_KEY']
logging.basicConfig(level=app.config.get('LOG_LEVEL', 'INFO'))
initialize_extensions(app)
register_blueprints(app)
register_commands(app)
core.init_app(app)
return app
def initialize_extensions(app):
# Since the application instance is now created, pass it to each Flask
# extension instance to bind it to the Flask application instance (app)
ext_database.init_app(app)
ext_migrate.init(app, db)
ext_redis.init_app(app)
ext_vector_store.init_app(app)
ext_storage.init_app(app)
ext_celery.init_app(app)
ext_session.init_app(app)
ext_login.init_app(app)
ext_sentry.init_app(app)
# Flask-Login configuration
@login_manager.user_loader
def load_user(user_id):
"""Load user based on the user_id."""
if request.blueprint == 'console':
# Check if the user_id contains a dot, indicating the old format
if '.' in user_id:
tenant_id, account_id = user_id.split('.')
else:
account_id = user_id
account = db.session.query(Account).filter(Account.id == account_id).first()
if account:
workspace_id = session.get('workspace_id')
if workspace_id:
tenant_account_join = db.session.query(TenantAccountJoin).filter(
TenantAccountJoin.account_id == account.id,
TenantAccountJoin.tenant_id == workspace_id
).first()
if not tenant_account_join:
tenant_account_join = db.session.query(TenantAccountJoin).filter(
TenantAccountJoin.account_id == account.id).first()
if tenant_account_join:
account.current_tenant_id = tenant_account_join.tenant_id
session['workspace_id'] = account.current_tenant_id
else:
account.current_tenant_id = workspace_id
else:
tenant_account_join = db.session.query(TenantAccountJoin).filter(
TenantAccountJoin.account_id == account.id).first()
if tenant_account_join:
account.current_tenant_id = tenant_account_join.tenant_id
session['workspace_id'] = account.current_tenant_id
# Log in the user with the updated user_id
flask_login.login_user(account, remember=True)
return account
else:
return None
@login_manager.unauthorized_handler
def unauthorized_handler():
"""Handle unauthorized requests."""
return Response(json.dumps({
'code': 'unauthorized',
'message': "Unauthorized."
}), status=401, content_type="application/json")
# register blueprint routers
def register_blueprints(app):
from controllers.service_api import bp as service_api_bp
from controllers.web import bp as web_bp
from controllers.console import bp as console_app_bp
app.register_blueprint(service_api_bp)
CORS(web_bp,
resources={
r"/*": {"origins": app.config['WEB_API_CORS_ALLOW_ORIGINS']}},
supports_credentials=True,
allow_headers=['Content-Type', 'Authorization'],
methods=['GET', 'PUT', 'POST', 'DELETE', 'OPTIONS', 'PATCH'],
expose_headers=['X-Version', 'X-Env']
)
app.register_blueprint(web_bp)
CORS(console_app_bp,
resources={
r"/*": {"origins": app.config['CONSOLE_CORS_ALLOW_ORIGINS']}},
supports_credentials=True,
allow_headers=['Content-Type', 'Authorization'],
methods=['GET', 'PUT', 'POST', 'DELETE', 'OPTIONS', 'PATCH'],
expose_headers=['X-Version', 'X-Env']
)
app.register_blueprint(console_app_bp)
# create app
app = create_app()
celery = app.extensions["celery"]
if app.config['TESTING']:
print("App is running in TESTING mode")
@app.after_request
def after_request(response):
"""Add Version headers to the response."""
response.headers.add('X-Version', app.config['CURRENT_VERSION'])
response.headers.add('X-Env', app.config['DEPLOY_ENV'])
return response
@app.route('/health')
def health():
return Response(json.dumps({
'status': 'ok',
'version': app.config['CURRENT_VERSION']
}), status=200, content_type="application/json")
@app.route('/threads')
def threads():
num_threads = threading.active_count()
threads = threading.enumerate()
thread_list = []
for thread in threads:
thread_name = thread.name
thread_id = thread.ident
is_alive = thread.is_alive()
thread_list.append({
'name': thread_name,
'id': thread_id,
'is_alive': is_alive
})
return {
'thread_num': num_threads,
'threads': thread_list
}
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5001)

160
api/commands.py Normal file
View File

@ -0,0 +1,160 @@
import datetime
import json
import random
import string
import click
from libs.password import password_pattern, valid_password, hash_password
from libs.helper import email as email_validate
from extensions.ext_database import db
from models.account import InvitationCode
from models.model import Account, AppModelConfig, ApiToken, Site, App, RecommendedApp
import secrets
import base64
@click.command('reset-password', help='Reset the account password.')
@click.option('--email', prompt=True, help='The email address of the account whose password you need to reset')
@click.option('--new-password', prompt=True, help='the new password.')
@click.option('--password-confirm', prompt=True, help='the new password confirm.')
def reset_password(email, new_password, password_confirm):
if str(new_password).strip() != str(password_confirm).strip():
click.echo(click.style('sorry. The two passwords do not match.', fg='red'))
return
account = db.session.query(Account). \
filter(Account.email == email). \
one_or_none()
if not account:
click.echo(click.style('sorry. the account: [{}] not exist .'.format(email), fg='red'))
return
try:
valid_password(new_password)
except:
click.echo(
click.style('sorry. The passwords must match {} '.format(password_pattern), fg='red'))
return
# generate password salt
salt = secrets.token_bytes(16)
base64_salt = base64.b64encode(salt).decode()
# encrypt password with salt
password_hashed = hash_password(new_password, salt)
base64_password_hashed = base64.b64encode(password_hashed).decode()
account.password = base64_password_hashed
account.password_salt = base64_salt
db.session.commit()
click.echo(click.style('Congratulations!, password has been reset.', fg='green'))
@click.command('reset-email', help='Reset the account email.')
@click.option('--email', prompt=True, help='The old email address of the account whose email you need to reset')
@click.option('--new-email', prompt=True, help='the new email.')
@click.option('--email-confirm', prompt=True, help='the new email confirm.')
def reset_email(email, new_email, email_confirm):
if str(new_email).strip() != str(email_confirm).strip():
click.echo(click.style('Sorry, new email and confirm email do not match.', fg='red'))
return
account = db.session.query(Account). \
filter(Account.email == email). \
one_or_none()
if not account:
click.echo(click.style('sorry. the account: [{}] not exist .'.format(email), fg='red'))
return
try:
email_validate(new_email)
except:
click.echo(
click.style('sorry. {} is not a valid email. '.format(email), fg='red'))
return
account.email = new_email
db.session.commit()
click.echo(click.style('Congratulations!, email has been reset.', fg='green'))
@click.command('generate-invitation-codes', help='Generate invitation codes.')
@click.option('--batch', help='The batch of invitation codes.')
@click.option('--count', prompt=True, help='Invitation codes count.')
def generate_invitation_codes(batch, count):
if not batch:
now = datetime.datetime.now()
batch = now.strftime('%Y%m%d%H%M%S')
if not count or int(count) <= 0:
click.echo(click.style('sorry. the count must be greater than 0.', fg='red'))
return
count = int(count)
click.echo('Start generate {} invitation codes for batch {}.'.format(count, batch))
codes = ''
for i in range(count):
code = generate_invitation_code()
invitation_code = InvitationCode(
code=code,
batch=batch
)
db.session.add(invitation_code)
click.echo(code)
codes += code + "\n"
db.session.commit()
filename = 'storage/invitation-codes-{}.txt'.format(batch)
with open(filename, 'w') as f:
f.write(codes)
click.echo(click.style(
'Congratulations! Generated {} invitation codes for batch {} and saved to the file \'{}\''.format(count, batch,
filename),
fg='green'))
def generate_invitation_code():
code = generate_upper_string()
while db.session.query(InvitationCode).filter(InvitationCode.code == code).count() > 0:
code = generate_upper_string()
return code
def generate_upper_string():
letters_digits = string.ascii_uppercase + string.digits
result = ""
for i in range(8):
result += random.choice(letters_digits)
return result
@click.command('gen-recommended-apps', help='Number of records to generate')
def generate_recommended_apps():
print('Generating recommended app data...')
apps = App.query.all()
for app in apps:
recommended_app = RecommendedApp(
app_id=app.id,
description={
'en': 'Description for ' + app.name,
'zh': '描述 ' + app.name
},
copyright='Copyright ' + str(random.randint(1990, 2020)),
privacy_policy='https://privacypolicy.example.com',
category=random.choice(['Games', 'News', 'Music', 'Sports']),
position=random.randint(1, 100),
install_count=random.randint(100, 100000)
)
db.session.add(recommended_app)
db.session.commit()
print('Done!')
def register_commands(app):
app.cli.add_command(reset_password)
app.cli.add_command(reset_email)
app.cli.add_command(generate_invitation_codes)
app.cli.add_command(generate_recommended_apps)

200
api/config.py Normal file
View File

@ -0,0 +1,200 @@
# -*- coding:utf-8 -*-
import os
from datetime import timedelta
import dotenv
from extensions.ext_database import db
from extensions.ext_redis import redis_client
dotenv.load_dotenv()
DEFAULTS = {
'COOKIE_HTTPONLY': 'True',
'COOKIE_SECURE': 'True',
'COOKIE_SAMESITE': 'None',
'DB_USERNAME': 'postgres',
'DB_PASSWORD': '',
'DB_HOST': 'localhost',
'DB_PORT': '5432',
'DB_DATABASE': 'dify',
'REDIS_HOST': 'localhost',
'REDIS_PORT': '6379',
'REDIS_DB': '0',
'SESSION_REDIS_HOST': 'localhost',
'SESSION_REDIS_PORT': '6379',
'SESSION_REDIS_DB': '2',
'OAUTH_REDIRECT_PATH': '/console/api/oauth/authorize',
'OAUTH_REDIRECT_INDEX_PATH': '/',
'CONSOLE_URL': 'https://cloud.dify.ai',
'API_URL': 'https://api.dify.ai',
'APP_URL': 'https://udify.app',
'STORAGE_TYPE': 'local',
'STORAGE_LOCAL_PATH': 'storage',
'CHECK_UPDATE_URL': 'https://updates.dify.ai',
'SESSION_TYPE': 'sqlalchemy',
'SESSION_PERMANENT': 'True',
'SESSION_USE_SIGNER': 'True',
'DEPLOY_ENV': 'PRODUCTION',
'SQLALCHEMY_POOL_SIZE': 30,
'SQLALCHEMY_ECHO': 'False',
'SENTRY_TRACES_SAMPLE_RATE': 1.0,
'SENTRY_PROFILES_SAMPLE_RATE': 1.0,
'WEAVIATE_GRPC_ENABLED': 'True',
'CELERY_BACKEND': 'database',
'PDF_PREVIEW': 'True',
'LOG_LEVEL': 'INFO',
}
def get_env(key):
return os.environ.get(key, DEFAULTS.get(key))
def get_bool_env(key):
return get_env(key).lower() == 'true'
def get_cors_allow_origins(env, default):
cors_allow_origins = []
if get_env(env):
for origin in get_env(env).split(','):
cors_allow_origins.append(origin)
else:
cors_allow_origins = [default]
return cors_allow_origins
class Config:
"""Application configuration class."""
def __init__(self):
# app settings
self.CONSOLE_URL = get_env('CONSOLE_URL')
self.API_URL = get_env('API_URL')
self.APP_URL = get_env('APP_URL')
self.CURRENT_VERSION = "0.2.0"
self.COMMIT_SHA = get_env('COMMIT_SHA')
self.EDITION = "SELF_HOSTED"
self.DEPLOY_ENV = get_env('DEPLOY_ENV')
self.TESTING = False
self.LOG_LEVEL = get_env('LOG_LEVEL')
self.PDF_PREVIEW = get_bool_env('PDF_PREVIEW')
# Your App secret key will be used for securely signing the session cookie
# Make sure you are changing this key for your deployment with a strong key.
# You can generate a strong key using `openssl rand -base64 42`.
# Alternatively you can set it with `SECRET_KEY` environment variable.
self.SECRET_KEY = get_env('SECRET_KEY')
# cookie settings
self.REMEMBER_COOKIE_HTTPONLY = get_bool_env('COOKIE_HTTPONLY')
self.SESSION_COOKIE_HTTPONLY = get_bool_env('COOKIE_HTTPONLY')
self.REMEMBER_COOKIE_SAMESITE = get_env('COOKIE_SAMESITE')
self.SESSION_COOKIE_SAMESITE = get_env('COOKIE_SAMESITE')
self.REMEMBER_COOKIE_SECURE = get_bool_env('COOKIE_SECURE')
self.SESSION_COOKIE_SECURE = get_bool_env('COOKIE_SECURE')
self.PERMANENT_SESSION_LIFETIME = timedelta(days=7)
# session settings, only support sqlalchemy, redis
self.SESSION_TYPE = get_env('SESSION_TYPE')
self.SESSION_PERMANENT = get_bool_env('SESSION_PERMANENT')
self.SESSION_USE_SIGNER = get_bool_env('SESSION_USE_SIGNER')
# redis settings
self.REDIS_HOST = get_env('REDIS_HOST')
self.REDIS_PORT = get_env('REDIS_PORT')
self.REDIS_PASSWORD = get_env('REDIS_PASSWORD')
self.REDIS_DB = get_env('REDIS_DB')
# session redis settings
self.SESSION_REDIS_HOST = get_env('SESSION_REDIS_HOST')
self.SESSION_REDIS_PORT = get_env('SESSION_REDIS_PORT')
self.SESSION_REDIS_PASSWORD = get_env('SESSION_REDIS_PASSWORD')
self.SESSION_REDIS_DB = get_env('SESSION_REDIS_DB')
# storage settings
self.STORAGE_TYPE = get_env('STORAGE_TYPE')
self.STORAGE_LOCAL_PATH = get_env('STORAGE_LOCAL_PATH')
self.S3_ENDPOINT = get_env('S3_ENDPOINT')
self.S3_BUCKET_NAME = get_env('S3_BUCKET_NAME')
self.S3_ACCESS_KEY = get_env('S3_ACCESS_KEY')
self.S3_SECRET_KEY = get_env('S3_SECRET_KEY')
self.S3_REGION = get_env('S3_REGION')
# vector store settings, only support weaviate, qdrant
self.VECTOR_STORE = get_env('VECTOR_STORE')
# weaviate settings
self.WEAVIATE_ENDPOINT = get_env('WEAVIATE_ENDPOINT')
self.WEAVIATE_API_KEY = get_env('WEAVIATE_API_KEY')
self.WEAVIATE_GRPC_ENABLED = get_bool_env('WEAVIATE_GRPC_ENABLED')
# qdrant settings
self.QDRANT_URL = get_env('QDRANT_URL')
self.QDRANT_API_KEY = get_env('QDRANT_API_KEY')
# cors settings
self.CONSOLE_CORS_ALLOW_ORIGINS = get_cors_allow_origins(
'CONSOLE_CORS_ALLOW_ORIGINS', self.CONSOLE_URL)
self.WEB_API_CORS_ALLOW_ORIGINS = get_cors_allow_origins(
'WEB_API_CORS_ALLOW_ORIGINS', '*')
# sentry settings
self.SENTRY_DSN = get_env('SENTRY_DSN')
self.SENTRY_TRACES_SAMPLE_RATE = float(get_env('SENTRY_TRACES_SAMPLE_RATE'))
self.SENTRY_PROFILES_SAMPLE_RATE = float(get_env('SENTRY_PROFILES_SAMPLE_RATE'))
# check update url
self.CHECK_UPDATE_URL = get_env('CHECK_UPDATE_URL')
# database settings
db_credentials = {
key: get_env(key) for key in
['DB_USERNAME', 'DB_PASSWORD', 'DB_HOST', 'DB_PORT', 'DB_DATABASE']
}
self.SQLALCHEMY_DATABASE_URI = f"postgresql://{db_credentials['DB_USERNAME']}:{db_credentials['DB_PASSWORD']}@{db_credentials['DB_HOST']}:{db_credentials['DB_PORT']}/{db_credentials['DB_DATABASE']}"
self.SQLALCHEMY_ENGINE_OPTIONS = {'pool_size': int(get_env('SQLALCHEMY_POOL_SIZE'))}
self.SQLALCHEMY_ECHO = get_bool_env('SQLALCHEMY_ECHO')
# celery settings
self.CELERY_BROKER_URL = get_env('CELERY_BROKER_URL')
self.CELERY_BACKEND = get_env('CELERY_BACKEND')
self.CELERY_RESULT_BACKEND = 'db+{}'.format(self.SQLALCHEMY_DATABASE_URI) \
if self.CELERY_BACKEND == 'database' else self.CELERY_BROKER_URL
# hosted provider credentials
self.OPENAI_API_KEY = get_env('OPENAI_API_KEY')
class CloudEditionConfig(Config):
def __init__(self):
super().__init__()
self.EDITION = "CLOUD"
self.GITHUB_CLIENT_ID = get_env('GITHUB_CLIENT_ID')
self.GITHUB_CLIENT_SECRET = get_env('GITHUB_CLIENT_SECRET')
self.GOOGLE_CLIENT_ID = get_env('GOOGLE_CLIENT_ID')
self.GOOGLE_CLIENT_SECRET = get_env('GOOGLE_CLIENT_SECRET')
self.OAUTH_REDIRECT_PATH = get_env('OAUTH_REDIRECT_PATH')
class TestConfig(Config):
def __init__(self):
super().__init__()
self.EDITION = "SELF_HOSTED"
self.TESTING = True
db_credentials = {
key: get_env(key) for key in ['DB_USERNAME', 'DB_PASSWORD', 'DB_HOST', 'DB_PORT']
}
# use a different database for testing: dify_test
self.SQLALCHEMY_DATABASE_URI = f"postgresql://{db_credentials['DB_USERNAME']}:{db_credentials['DB_PASSWORD']}@{db_credentials['DB_HOST']}:{db_credentials['DB_PORT']}/dify_test"

View File

View File

@ -0,0 +1,322 @@
import json
from models.model import AppModelConfig, App
model_templates = {
# completion default mode
'completion_default': {
'app': {
'mode': 'completion',
'enable_site': True,
'enable_api': True,
'is_demo': False,
'api_rpm': 0,
'api_rph': 0,
'status': 'normal'
},
'model_config': {
'provider': 'openai',
'model_id': 'text-davinci-003',
'configs': {
'prompt_template': '',
'prompt_variables': [],
'completion_params': {
'max_token': 512,
'temperature': 1,
'top_p': 1,
'presence_penalty': 0,
'frequency_penalty': 0,
}
},
'model': json.dumps({
"provider": "openai",
"name": "text-davinci-003",
"completion_params": {
"max_tokens": 512,
"temperature": 1,
"top_p": 1,
"presence_penalty": 0,
"frequency_penalty": 0
}
})
}
},
# chat default mode
'chat_default': {
'app': {
'mode': 'chat',
'enable_site': True,
'enable_api': True,
'is_demo': False,
'api_rpm': 0,
'api_rph': 0,
'status': 'normal'
},
'model_config': {
'provider': 'openai',
'model_id': 'gpt-3.5-turbo',
'configs': {
'prompt_template': '',
'prompt_variables': [],
'completion_params': {
'max_token': 512,
'temperature': 1,
'top_p': 1,
'presence_penalty': 0,
'frequency_penalty': 0,
}
},
'model': json.dumps({
"provider": "openai",
"name": "gpt-3.5-turbo",
"completion_params": {
"max_tokens": 512,
"temperature": 1,
"top_p": 1,
"presence_penalty": 0,
"frequency_penalty": 0
}
})
}
},
}
demo_model_templates = {
'en-US': [
{
'name': 'Translation Assistant',
'icon': '',
'icon_background': '',
'description': 'A multilingual translator that provides translation capabilities in multiple languages, translating user input into the language they need.',
'mode': 'completion',
'model_config': AppModelConfig(
provider='openai',
model_id='text-davinci-003',
configs={
'prompt_template': "Please translate the following text into {{target_language}}:\n",
'prompt_variables': [
{
"key": "target_language",
"name": "Target Language",
"description": "The language you want to translate into.",
"type": "select",
"default": "Chinese",
'options': [
'Chinese',
'English',
'Japanese',
'French',
'Russian',
'German',
'Spanish',
'Korean',
'Italian',
]
}
],
'completion_params': {
'max_token': 1000,
'temperature': 0,
'top_p': 0,
'presence_penalty': 0.1,
'frequency_penalty': 0.1,
}
},
opening_statement='',
suggested_questions=None,
pre_prompt="Please translate the following text into {{target_language}}:\n",
model=json.dumps({
"provider": "openai",
"name": "text-davinci-003",
"completion_params": {
"max_tokens": 1000,
"temperature": 0,
"top_p": 0,
"presence_penalty": 0.1,
"frequency_penalty": 0.1
}
}),
user_input_form=json.dumps([
{
"select": {
"label": "Target Language",
"variable": "target_language",
"description": "The language you want to translate into.",
"default": "Chinese",
"required": True,
'options': [
'Chinese',
'English',
'Japanese',
'French',
'Russian',
'German',
'Spanish',
'Korean',
'Italian',
]
}
}
])
)
},
{
'name': 'AI Front-end Interviewer',
'icon': '',
'icon_background': '',
'description': 'A simulated front-end interviewer that tests the skill level of front-end development through questioning.',
'mode': 'chat',
'model_config': AppModelConfig(
provider='openai',
model_id='gpt-3.5-turbo',
configs={
'introduction': 'Hi, welcome to our interview. I am the interviewer for this technology company, and I will test your web front-end development skills. Next, I will ask you some technical questions. Please answer them as thoroughly as possible. ',
'prompt_template': "You will play the role of an interviewer for a technology company, examining the user's web front-end development skills and posing 5-10 sharp technical questions.\n\nPlease note:\n- Only ask one question at a time.\n- After the user answers a question, ask the next question directly, without trying to correct any mistakes made by the candidate.\n- If you think the user has not answered correctly for several consecutive questions, ask fewer questions.\n- After asking the last question, you can ask this question: Why did you leave your last job? After the user answers this question, please express your understanding and support.\n",
'prompt_variables': [],
'completion_params': {
'max_token': 300,
'temperature': 0.8,
'top_p': 0.9,
'presence_penalty': 0.1,
'frequency_penalty': 0.1,
}
},
opening_statement='Hi, welcome to our interview. I am the interviewer for this technology company, and I will test your web front-end development skills. Next, I will ask you some technical questions. Please answer them as thoroughly as possible. ',
suggested_questions=None,
pre_prompt="You will play the role of an interviewer for a technology company, examining the user's web front-end development skills and posing 5-10 sharp technical questions.\n\nPlease note:\n- Only ask one question at a time.\n- After the user answers a question, ask the next question directly, without trying to correct any mistakes made by the candidate.\n- If you think the user has not answered correctly for several consecutive questions, ask fewer questions.\n- After asking the last question, you can ask this question: Why did you leave your last job? After the user answers this question, please express your understanding and support.\n",
model=json.dumps({
"provider": "openai",
"name": "gpt-3.5-turbo",
"completion_params": {
"max_tokens": 300,
"temperature": 0.8,
"top_p": 0.9,
"presence_penalty": 0.1,
"frequency_penalty": 0.1
}
}),
user_input_form=None
)
}
],
'zh-Hans': [
{
'name': '翻译助手',
'icon': '',
'icon_background': '',
'description': '一个多语言翻译器,提供多种语言翻译能力,将用户输入的文本翻译成他们需要的语言。',
'mode': 'completion',
'model_config': AppModelConfig(
provider='openai',
model_id='text-davinci-003',
configs={
'prompt_template': "请将以下文本翻译为{{target_language}}:\n",
'prompt_variables': [
{
"key": "target_language",
"name": "目标语言",
"description": "翻译的目标语言",
"type": "select",
"default": "中文",
"options": [
"中文",
"英文",
"日语",
"法语",
"俄语",
"德语",
"西班牙语",
"韩语",
"意大利语",
]
}
],
'completion_params': {
'max_token': 1000,
'temperature': 0,
'top_p': 0,
'presence_penalty': 0.1,
'frequency_penalty': 0.1,
}
},
opening_statement='',
suggested_questions=None,
pre_prompt="请将以下文本翻译为{{target_language}}:\n",
model=json.dumps({
"provider": "openai",
"name": "text-davinci-003",
"completion_params": {
"max_tokens": 1000,
"temperature": 0,
"top_p": 0,
"presence_penalty": 0.1,
"frequency_penalty": 0.1
}
}),
user_input_form=json.dumps([
{
"select": {
"label": "目标语言",
"variable": "target_language",
"description": "翻译的目标语言",
"default": "中文",
"required": True,
'options': [
"中文",
"英文",
"日语",
"法语",
"俄语",
"德语",
"西班牙语",
"韩语",
"意大利语",
]
}
}
])
)
},
{
'name': 'AI 前端面试官',
'icon': '',
'icon_background': '',
'description': '一个模拟的前端面试官,通过提问的方式对前端开发的技能水平进行检验。',
'mode': 'chat',
'model_config': AppModelConfig(
provider='openai',
model_id='gpt-3.5-turbo',
configs={
'introduction': '你好,欢迎来参加我们的面试,我是这家科技公司的面试官,我将考察你的 Web 前端开发技能。接下来我会向您提出一些技术问题,请您尽可能详尽地回答。',
'prompt_template': "你将扮演一个科技公司的面试官,考察用户作为候选人的 Web 前端开发水平,提出 5-10 个犀利的技术问题。\n\n请注意:\n- 每次只问一个问题\n- 用户回答问题后请直接问下一个问题,而不要试图纠正候选人的错误;\n- 如果你认为用户连续几次回答的都不对,就少问一点;\n- 问完最后一个问题后,你可以问这样一个问题:上一份工作为什么离职?用户回答该问题后,请表示理解与支持。\n",
'prompt_variables': [],
'completion_params': {
'max_token': 300,
'temperature': 0.8,
'top_p': 0.9,
'presence_penalty': 0.1,
'frequency_penalty': 0.1,
}
},
opening_statement='你好,欢迎来参加我们的面试,我是这家科技公司的面试官,我将考察你的 Web 前端开发技能。接下来我会向您提出一些技术问题,请您尽可能详尽地回答。',
suggested_questions=None,
pre_prompt="你将扮演一个科技公司的面试官,考察用户作为候选人的 Web 前端开发水平,提出 5-10 个犀利的技术问题。\n\n请注意:\n- 每次只问一个问题\n- 用户回答问题后请直接问下一个问题,而不要试图纠正候选人的错误;\n- 如果你认为用户连续几次回答的都不对,就少问一点;\n- 问完最后一个问题后,你可以问这样一个问题:上一份工作为什么离职?用户回答该问题后,请表示理解与支持。\n",
model=json.dumps({
"provider": "openai",
"name": "gpt-3.5-turbo",
"completion_params": {
"max_tokens": 300,
"temperature": 0.8,
"top_p": 0.9,
"presence_penalty": 0.1,
"frequency_penalty": 0.1
}
}),
user_input_form=None
)
}
],
}

View File

@ -0,0 +1,4 @@
# -*- coding:utf-8 -*-

View File

@ -0,0 +1,20 @@
from flask import Blueprint
from libs.external_api import ExternalApi
bp = Blueprint('console', __name__, url_prefix='/console/api')
api = ExternalApi(bp)
# Import app controllers
from .app import app, site, explore, completion, model_config, statistic, conversation, message
# Import auth controllers
from .auth import login, oauth
# Import datasets controllers
from .datasets import datasets, datasets_document, datasets_segments, file, hit_testing
# Import other controllers
from . import setup, version, apikey
from .workspace import workspace, members, providers, account

View File

@ -0,0 +1,175 @@
from flask_login import login_required, current_user
import flask_restful
from flask_restful import Resource, fields, marshal_with
from werkzeug.exceptions import Forbidden
from extensions.ext_database import db
from models.model import App, ApiToken
from models.dataset import Dataset
from . import api
from .setup import setup_required
from .wraps import account_initialization_required
from libs.helper import TimestampField
api_key_fields = {
'id': fields.String,
'type': fields.String,
'token': fields.String,
'last_used_at': TimestampField,
'created_at': TimestampField
}
api_key_list = {
'data': fields.List(fields.Nested(api_key_fields), attribute="items")
}
def _get_resource(resource_id, tenant_id, resource_model):
resource = resource_model.query.filter_by(
id=resource_id, tenant_id=tenant_id
).first()
if resource is None:
flask_restful.abort(
404, message=f"{resource_model.__name__} not found.")
return resource
class BaseApiKeyListResource(Resource):
method_decorators = [account_initialization_required, login_required, setup_required]
resource_type = None
resource_model = None
resource_id_field = None
token_prefix = None
max_keys = 10
@marshal_with(api_key_list)
def get(self, resource_id):
resource_id = str(resource_id)
_get_resource(resource_id, current_user.current_tenant_id,
self.resource_model)
keys = db.session.query(ApiToken). \
filter(ApiToken.type == self.resource_type, getattr(ApiToken, self.resource_id_field) == resource_id). \
all()
return {"items": keys}
@marshal_with(api_key_fields)
def post(self, resource_id):
resource_id = str(resource_id)
_get_resource(resource_id, current_user.current_tenant_id,
self.resource_model)
# The role of the current user in the ta table must be admin or owner
if current_user.current_tenant.current_role not in ['admin', 'owner']:
raise Forbidden()
current_key_count = db.session.query(ApiToken). \
filter(ApiToken.type == self.resource_type, getattr(ApiToken, self.resource_id_field) == resource_id). \
count()
if current_key_count >= self.max_keys:
flask_restful.abort(
400,
message=f"Cannot create more than {self.max_keys} API keys for this resource type.",
code='max_keys_exceeded'
)
key = ApiToken.generate_api_key(self.token_prefix, 24)
api_token = ApiToken()
setattr(api_token, self.resource_id_field, resource_id)
api_token.token = key
api_token.type = self.resource_type
db.session.add(api_token)
db.session.commit()
return api_token, 201
class BaseApiKeyResource(Resource):
method_decorators = [account_initialization_required, login_required, setup_required]
resource_type = None
resource_model = None
resource_id_field = None
def delete(self, resource_id, api_key_id):
resource_id = str(resource_id)
api_key_id = str(api_key_id)
_get_resource(resource_id, current_user.current_tenant_id,
self.resource_model)
# The role of the current user in the ta table must be admin or owner
if current_user.current_tenant.current_role not in ['admin', 'owner']:
raise Forbidden()
key = db.session.query(ApiToken). \
filter(getattr(ApiToken, self.resource_id_field) == resource_id, ApiToken.type == self.resource_type, ApiToken.id == api_key_id). \
first()
if key is None:
flask_restful.abort(404, message='API key not found')
db.session.query(ApiToken).filter(ApiToken.id == api_key_id).delete()
db.session.commit()
return {'result': 'success'}, 204
class AppApiKeyListResource(BaseApiKeyListResource):
def after_request(self, resp):
resp.headers['Access-Control-Allow-Origin'] = '*'
resp.headers['Access-Control-Allow-Credentials'] = 'true'
return resp
resource_type = 'app'
resource_model = App
resource_id_field = 'app_id'
token_prefix = 'app-'
class AppApiKeyResource(BaseApiKeyResource):
def after_request(self, resp):
resp.headers['Access-Control-Allow-Origin'] = '*'
resp.headers['Access-Control-Allow-Credentials'] = 'true'
return resp
resource_type = 'app'
resource_model = App
resource_id_field = 'app_id'
class DatasetApiKeyListResource(BaseApiKeyListResource):
def after_request(self, resp):
resp.headers['Access-Control-Allow-Origin'] = '*'
resp.headers['Access-Control-Allow-Credentials'] = 'true'
return resp
resource_type = 'dataset'
resource_model = Dataset
resource_id_field = 'dataset_id'
token_prefix = 'ds-'
class DatasetApiKeyResource(BaseApiKeyResource):
def after_request(self, resp):
resp.headers['Access-Control-Allow-Origin'] = '*'
resp.headers['Access-Control-Allow-Credentials'] = 'true'
return resp
resource_type = 'dataset'
resource_model = Dataset
resource_id_field = 'dataset_id'
api.add_resource(AppApiKeyListResource, '/apps/<uuid:resource_id>/api-keys')
api.add_resource(AppApiKeyResource,
'/apps/<uuid:resource_id>/api-keys/<uuid:api_key_id>')
api.add_resource(DatasetApiKeyListResource,
'/datasets/<uuid:resource_id>/api-keys')
api.add_resource(DatasetApiKeyResource,
'/datasets/<uuid:resource_id>/api-keys/<uuid:api_key_id>')

View File

@ -0,0 +1,22 @@
from flask_login import current_user
from werkzeug.exceptions import NotFound
from controllers.console.app.error import AppUnavailableError
from extensions.ext_database import db
from models.model import App
def _get_app(app_id, mode=None):
app = db.session.query(App).filter(
App.id == app_id,
App.tenant_id == current_user.current_tenant_id,
App.status == 'normal'
).first()
if not app:
raise NotFound("App not found")
if mode and app.mode != mode:
raise AppUnavailableError()
return app

View File

@ -0,0 +1,518 @@
# -*- coding:utf-8 -*-
import json
from datetime import datetime
import flask
from flask_login import login_required, current_user
from flask_restful import Resource, reqparse, fields, marshal_with, abort, inputs
from werkzeug.exceptions import Unauthorized, Forbidden
from constants.model_template import model_templates, demo_model_templates
from controllers.console import api
from controllers.console.app.error import AppNotFoundError, ProviderNotInitializeError, ProviderQuotaExceededError, \
CompletionRequestError, ProviderModelCurrentlyNotSupportError
from controllers.console.setup import setup_required
from controllers.console.wraps import account_initialization_required
from core.generator.llm_generator import LLMGenerator
from core.llm.error import ProviderTokenNotInitError, QuotaExceededError, LLMBadRequestError, LLMAPIConnectionError, \
LLMAPIUnavailableError, LLMRateLimitError, LLMAuthorizationError, ModelCurrentlyNotSupportError
from events.app_event import app_was_created, app_was_deleted
from libs.helper import TimestampField
from extensions.ext_database import db
from models.model import App, AppModelConfig, Site, InstalledApp
from services.account_service import TenantService
from services.app_model_config_service import AppModelConfigService
model_config_fields = {
'opening_statement': fields.String,
'suggested_questions': fields.Raw(attribute='suggested_questions_list'),
'suggested_questions_after_answer': fields.Raw(attribute='suggested_questions_after_answer_dict'),
'more_like_this': fields.Raw(attribute='more_like_this_dict'),
'model': fields.Raw(attribute='model_dict'),
'user_input_form': fields.Raw(attribute='user_input_form_list'),
'pre_prompt': fields.String,
'agent_mode': fields.Raw(attribute='agent_mode_dict'),
}
app_detail_fields = {
'id': fields.String,
'name': fields.String,
'mode': fields.String,
'icon': fields.String,
'icon_background': fields.String,
'enable_site': fields.Boolean,
'enable_api': fields.Boolean,
'api_rpm': fields.Integer,
'api_rph': fields.Integer,
'is_demo': fields.Boolean,
'model_config': fields.Nested(model_config_fields, attribute='app_model_config'),
'created_at': TimestampField
}
def _get_app(app_id, tenant_id):
app = db.session.query(App).filter(App.id == app_id, App.tenant_id == tenant_id).first()
if not app:
raise AppNotFoundError
return app
class AppListApi(Resource):
prompt_config_fields = {
'prompt_template': fields.String,
}
model_config_partial_fields = {
'model': fields.Raw(attribute='model_dict'),
'pre_prompt': fields.String,
}
app_partial_fields = {
'id': fields.String,
'name': fields.String,
'mode': fields.String,
'icon': fields.String,
'icon_background': fields.String,
'enable_site': fields.Boolean,
'enable_api': fields.Boolean,
'is_demo': fields.Boolean,
'model_config': fields.Nested(model_config_partial_fields, attribute='app_model_config'),
'created_at': TimestampField
}
app_pagination_fields = {
'page': fields.Integer,
'limit': fields.Integer(attribute='per_page'),
'total': fields.Integer,
'has_more': fields.Boolean(attribute='has_next'),
'data': fields.List(fields.Nested(app_partial_fields), attribute='items')
}
@setup_required
@login_required
@account_initialization_required
@marshal_with(app_pagination_fields)
def get(self):
"""Get app list"""
parser = reqparse.RequestParser()
parser.add_argument('page', type=inputs.int_range(1, 99999), required=False, default=1, location='args')
parser.add_argument('limit', type=inputs.int_range(1, 100), required=False, default=20, location='args')
args = parser.parse_args()
app_models = db.paginate(
db.select(App).where(App.tenant_id == current_user.current_tenant_id).order_by(App.created_at.desc()),
page=args['page'],
per_page=args['limit'],
error_out=False)
return app_models
@setup_required
@login_required
@account_initialization_required
@marshal_with(app_detail_fields)
def post(self):
"""Create app"""
parser = reqparse.RequestParser()
parser.add_argument('name', type=str, required=True, location='json')
parser.add_argument('mode', type=str, choices=['completion', 'chat'], location='json')
parser.add_argument('icon', type=str, location='json')
parser.add_argument('icon_background', type=str, location='json')
parser.add_argument('model_config', type=dict, location='json')
args = parser.parse_args()
# The role of the current user in the ta table must be admin or owner
if current_user.current_tenant.current_role not in ['admin', 'owner']:
raise Forbidden()
if args['model_config'] is not None:
# validate config
model_configuration = AppModelConfigService.validate_configuration(
account=current_user,
config=args['model_config'],
mode=args['mode']
)
app = App(
enable_site=True,
enable_api=True,
is_demo=False,
api_rpm=0,
api_rph=0,
status='normal'
)
app_model_config = AppModelConfig(
provider="",
model_id="",
configs={},
opening_statement=model_configuration['opening_statement'],
suggested_questions=json.dumps(model_configuration['suggested_questions']),
suggested_questions_after_answer=json.dumps(model_configuration['suggested_questions_after_answer']),
more_like_this=json.dumps(model_configuration['more_like_this']),
model=json.dumps(model_configuration['model']),
user_input_form=json.dumps(model_configuration['user_input_form']),
pre_prompt=model_configuration['pre_prompt'],
agent_mode=json.dumps(model_configuration['agent_mode']),
)
else:
if 'mode' not in args or args['mode'] is None:
abort(400, message="mode is required")
model_config_template = model_templates[args['mode'] + '_default']
app = App(**model_config_template['app'])
app_model_config = AppModelConfig(**model_config_template['model_config'])
app.name = args['name']
app.mode = args['mode']
app.icon = args['icon']
app.icon_background = args['icon_background']
app.tenant_id = current_user.current_tenant_id
db.session.add(app)
db.session.flush()
app_model_config.app_id = app.id
db.session.add(app_model_config)
db.session.flush()
app.app_model_config_id = app_model_config.id
account = current_user
site = Site(
app_id=app.id,
title=app.name,
default_language=account.interface_language,
customize_token_strategy='not_allow',
code=Site.generate_code(16)
)
db.session.add(site)
db.session.commit()
app_was_created.send(app)
return app, 201
class AppTemplateApi(Resource):
template_fields = {
'name': fields.String,
'icon': fields.String,
'icon_background': fields.String,
'description': fields.String,
'mode': fields.String,
'model_config': fields.Nested(model_config_fields),
}
template_list_fields = {
'data': fields.List(fields.Nested(template_fields)),
}
@setup_required
@login_required
@account_initialization_required
@marshal_with(template_list_fields)
def get(self):
"""Get app demo templates"""
account = current_user
interface_language = account.interface_language
return {'data': demo_model_templates.get(interface_language)}
class AppApi(Resource):
site_fields = {
'access_token': fields.String(attribute='code'),
'code': fields.String,
'title': fields.String,
'icon': fields.String,
'icon_background': fields.String,
'description': fields.String,
'default_language': fields.String,
'customize_domain': fields.String,
'copyright': fields.String,
'privacy_policy': fields.String,
'customize_token_strategy': fields.String,
'prompt_public': fields.Boolean,
'app_base_url': fields.String,
}
app_detail_fields_with_site = {
'id': fields.String,
'name': fields.String,
'mode': fields.String,
'icon': fields.String,
'icon_background': fields.String,
'enable_site': fields.Boolean,
'enable_api': fields.Boolean,
'api_rpm': fields.Integer,
'api_rph': fields.Integer,
'is_demo': fields.Boolean,
'model_config': fields.Nested(model_config_fields, attribute='app_model_config'),
'site': fields.Nested(site_fields),
'api_base_url': fields.String,
'created_at': TimestampField
}
@setup_required
@login_required
@account_initialization_required
@marshal_with(app_detail_fields_with_site)
def get(self, app_id):
"""Get app detail"""
app_id = str(app_id)
app = _get_app(app_id, current_user.current_tenant_id)
return app
@setup_required
@login_required
@account_initialization_required
def delete(self, app_id):
"""Delete app"""
app_id = str(app_id)
app = _get_app(app_id, current_user.current_tenant_id)
db.session.delete(app)
db.session.commit()
# todo delete related data??
# model_config, site, api_token, conversation, message, message_feedback, message_annotation
app_was_deleted.send(app)
return {'result': 'success'}, 204
class AppNameApi(Resource):
@setup_required
@login_required
@account_initialization_required
@marshal_with(app_detail_fields)
def post(self, app_id):
# The role of the current user in the ta table must be admin or owner
if current_user.current_tenant.current_role not in ['admin', 'owner']:
raise Forbidden()
parser = reqparse.RequestParser()
parser.add_argument('name', type=str, required=True, location='json')
args = parser.parse_args()
app = db.get_or_404(App, str(app_id))
if app.tenant_id != flask.session.get('tenant_id'):
raise Unauthorized()
app.name = args.get('name')
app.updated_at = datetime.utcnow()
db.session.commit()
return app
class AppIconApi(Resource):
@setup_required
@login_required
@account_initialization_required
@marshal_with(app_detail_fields)
def post(self, app_id):
# The role of the current user in the ta table must be admin or owner
if current_user.current_tenant.current_role not in ['admin', 'owner']:
raise Forbidden()
parser = reqparse.RequestParser()
parser.add_argument('icon', type=str, location='json')
parser.add_argument('icon_background', type=str, location='json')
args = parser.parse_args()
app = db.get_or_404(App, str(app_id))
if app.tenant_id != flask.session.get('tenant_id'):
raise Unauthorized()
app.icon = args.get('icon')
app.icon_background = args.get('icon_background')
app.updated_at = datetime.utcnow()
db.session.commit()
return app
class AppSiteStatus(Resource):
@setup_required
@login_required
@account_initialization_required
@marshal_with(app_detail_fields)
def post(self, app_id):
parser = reqparse.RequestParser()
parser.add_argument('enable_site', type=bool, required=True, location='json')
args = parser.parse_args()
app_id = str(app_id)
app = db.session.query(App).filter(App.id == app_id, App.tenant_id == current_user.current_tenant_id).first()
if not app:
raise AppNotFoundError
if args.get('enable_site') == app.enable_site:
return app
app.enable_site = args.get('enable_site')
app.updated_at = datetime.utcnow()
db.session.commit()
return app
class AppApiStatus(Resource):
@setup_required
@login_required
@account_initialization_required
@marshal_with(app_detail_fields)
def post(self, app_id):
parser = reqparse.RequestParser()
parser.add_argument('enable_api', type=bool, required=True, location='json')
args = parser.parse_args()
app_id = str(app_id)
app = _get_app(app_id, current_user.current_tenant_id)
if args.get('enable_api') == app.enable_api:
return app
app.enable_api = args.get('enable_api')
app.updated_at = datetime.utcnow()
db.session.commit()
return app
class AppRateLimit(Resource):
@setup_required
@login_required
@account_initialization_required
@marshal_with(app_detail_fields)
def post(self, app_id):
parser = reqparse.RequestParser()
parser.add_argument('api_rpm', type=inputs.natural, required=False, location='json')
parser.add_argument('api_rph', type=inputs.natural, required=False, location='json')
args = parser.parse_args()
app_id = str(app_id)
app = _get_app(app_id, current_user.current_tenant_id)
if args.get('api_rpm'):
app.api_rpm = args.get('api_rpm')
if args.get('api_rph'):
app.api_rph = args.get('api_rph')
app.updated_at = datetime.utcnow()
db.session.commit()
return app
class AppCopy(Resource):
@staticmethod
def create_app_copy(app):
copy_app = App(
name=app.name + ' copy',
icon=app.icon,
icon_background=app.icon_background,
tenant_id=app.tenant_id,
mode=app.mode,
app_model_config_id=app.app_model_config_id,
enable_site=app.enable_site,
enable_api=app.enable_api,
api_rpm=app.api_rpm,
api_rph=app.api_rph
)
return copy_app
@staticmethod
def create_app_model_config_copy(app_config, copy_app_id):
copy_app_model_config = AppModelConfig(
app_id=copy_app_id,
provider=app_config.provider,
model_id=app_config.model_id,
configs=app_config.configs,
opening_statement=app_config.opening_statement,
suggested_questions=app_config.suggested_questions,
suggested_questions_after_answer=app_config.suggested_questions_after_answer,
more_like_this=app_config.more_like_this,
model=app_config.model,
user_input_form=app_config.user_input_form,
pre_prompt=app_config.pre_prompt,
agent_mode=app_config.agent_mode
)
return copy_app_model_config
@setup_required
@login_required
@account_initialization_required
@marshal_with(app_detail_fields)
def post(self, app_id):
app_id = str(app_id)
app = _get_app(app_id, current_user.current_tenant_id)
copy_app = self.create_app_copy(app)
db.session.add(copy_app)
app_config = db.session.query(AppModelConfig). \
filter(AppModelConfig.app_id == app_id). \
one_or_none()
if app_config:
copy_app_model_config = self.create_app_model_config_copy(app_config, copy_app.id)
db.session.add(copy_app_model_config)
db.session.commit()
copy_app.app_model_config_id = copy_app_model_config.id
db.session.commit()
return copy_app, 201
class AppExport(Resource):
@setup_required
@login_required
@account_initialization_required
def post(self, app_id):
# todo
pass
class IntroductionGenerateApi(Resource):
@setup_required
@login_required
@account_initialization_required
def post(self):
parser = reqparse.RequestParser()
parser.add_argument('prompt_template', type=str, required=True, location='json')
args = parser.parse_args()
account = current_user
try:
answer = LLMGenerator.generate_introduction(
account.current_tenant_id,
args['prompt_template']
)
except ProviderTokenNotInitError:
raise ProviderNotInitializeError()
except QuotaExceededError:
raise ProviderQuotaExceededError()
except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError()
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
LLMRateLimitError, LLMAuthorizationError) as e:
raise CompletionRequestError(str(e))
return {'introduction': answer}
api.add_resource(AppListApi, '/apps')
api.add_resource(AppTemplateApi, '/app-templates')
api.add_resource(AppApi, '/apps/<uuid:app_id>')
api.add_resource(AppCopy, '/apps/<uuid:app_id>/copy')
api.add_resource(AppNameApi, '/apps/<uuid:app_id>/name')
api.add_resource(AppSiteStatus, '/apps/<uuid:app_id>/site-enable')
api.add_resource(AppApiStatus, '/apps/<uuid:app_id>/api-enable')
api.add_resource(AppRateLimit, '/apps/<uuid:app_id>/rate-limit')
api.add_resource(IntroductionGenerateApi, '/introduction-generate')

View File

@ -0,0 +1,206 @@
# -*- coding:utf-8 -*-
import json
import logging
from typing import Generator, Union
import flask_login
from flask import Response, stream_with_context
from flask_login import login_required
from werkzeug.exceptions import InternalServerError, NotFound
import services
from controllers.console import api
from controllers.console.app import _get_app
from controllers.console.app.error import ConversationCompletedError, AppUnavailableError, \
ProviderNotInitializeError, CompletionRequestError, ProviderQuotaExceededError, \
ProviderModelCurrentlyNotSupportError
from controllers.console.setup import setup_required
from controllers.console.wraps import account_initialization_required
from core.conversation_message_task import PubHandler
from core.llm.error import LLMBadRequestError, LLMAPIUnavailableError, LLMAuthorizationError, LLMAPIConnectionError, \
LLMRateLimitError, ProviderTokenNotInitError, QuotaExceededError, ModelCurrentlyNotSupportError
from libs.helper import uuid_value
from flask_restful import Resource, reqparse
from services.completion_service import CompletionService
# define completion message api for user
class CompletionMessageApi(Resource):
@setup_required
@login_required
@account_initialization_required
def post(self, app_id):
app_id = str(app_id)
# get app info
app_model = _get_app(app_id, 'completion')
parser = reqparse.RequestParser()
parser.add_argument('inputs', type=dict, required=True, location='json')
parser.add_argument('query', type=str, location='json')
parser.add_argument('model_config', type=dict, required=True, location='json')
args = parser.parse_args()
account = flask_login.current_user
try:
response = CompletionService.completion(
app_model=app_model,
user=account,
args=args,
from_source='console',
streaming=True,
is_model_config_override=True
)
return compact_response(response)
except services.errors.conversation.ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")
except services.errors.conversation.ConversationCompletedError:
raise ConversationCompletedError()
except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
raise AppUnavailableError()
except ProviderTokenNotInitError:
raise ProviderNotInitializeError()
except QuotaExceededError:
raise ProviderQuotaExceededError()
except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError()
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
LLMRateLimitError, LLMAuthorizationError) as e:
raise CompletionRequestError(str(e))
except ValueError as e:
raise e
except Exception as e:
logging.exception("internal server error.")
raise InternalServerError()
class CompletionMessageStopApi(Resource):
@setup_required
@login_required
@account_initialization_required
def post(self, app_id, task_id):
app_id = str(app_id)
# get app info
_get_app(app_id, 'completion')
account = flask_login.current_user
PubHandler.stop(account, task_id)
return {'result': 'success'}, 200
class ChatMessageApi(Resource):
@setup_required
@login_required
@account_initialization_required
def post(self, app_id):
app_id = str(app_id)
# get app info
app_model = _get_app(app_id, 'chat')
parser = reqparse.RequestParser()
parser.add_argument('inputs', type=dict, required=True, location='json')
parser.add_argument('query', type=str, required=True, location='json')
parser.add_argument('model_config', type=dict, required=True, location='json')
parser.add_argument('conversation_id', type=uuid_value, location='json')
args = parser.parse_args()
account = flask_login.current_user
try:
response = CompletionService.completion(
app_model=app_model,
user=account,
args=args,
from_source='console',
streaming=True,
is_model_config_override=True
)
return compact_response(response)
except services.errors.conversation.ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")
except services.errors.conversation.ConversationCompletedError:
raise ConversationCompletedError()
except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
raise AppUnavailableError()
except ProviderTokenNotInitError:
raise ProviderNotInitializeError()
except QuotaExceededError:
raise ProviderQuotaExceededError()
except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError()
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
LLMRateLimitError, LLMAuthorizationError) as e:
raise CompletionRequestError(str(e))
except ValueError as e:
raise e
except Exception as e:
logging.exception("internal server error.")
raise InternalServerError()
def compact_response(response: Union[dict | Generator]) -> Response:
if isinstance(response, dict):
return Response(response=json.dumps(response), status=200, mimetype='application/json')
else:
def generate() -> Generator:
try:
for chunk in response:
yield chunk
except services.errors.conversation.ConversationNotExistsError:
yield "data: " + json.dumps(api.handle_error(NotFound("Conversation Not Exists.")).get_json()) + "\n\n"
except services.errors.conversation.ConversationCompletedError:
yield "data: " + json.dumps(api.handle_error(ConversationCompletedError()).get_json()) + "\n\n"
except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
yield "data: " + json.dumps(api.handle_error(AppUnavailableError()).get_json()) + "\n\n"
except ProviderTokenNotInitError:
yield "data: " + json.dumps(api.handle_error(ProviderNotInitializeError()).get_json()) + "\n\n"
except QuotaExceededError:
yield "data: " + json.dumps(api.handle_error(ProviderQuotaExceededError()).get_json()) + "\n\n"
except ModelCurrentlyNotSupportError:
yield "data: " + json.dumps(api.handle_error(ProviderModelCurrentlyNotSupportError()).get_json()) + "\n\n"
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
LLMRateLimitError, LLMAuthorizationError) as e:
yield "data: " + json.dumps(api.handle_error(CompletionRequestError(str(e))).get_json()) + "\n\n"
except ValueError as e:
yield "data: " + json.dumps(api.handle_error(e).get_json()) + "\n\n"
except Exception:
logging.exception("internal server error.")
yield "data: " + json.dumps(api.handle_error(InternalServerError()).get_json()) + "\n\n"
return Response(stream_with_context(generate()), status=200,
mimetype='text/event-stream')
class ChatMessageStopApi(Resource):
@setup_required
@login_required
@account_initialization_required
def post(self, app_id, task_id):
app_id = str(app_id)
# get app info
_get_app(app_id, 'chat')
account = flask_login.current_user
PubHandler.stop(account, task_id)
return {'result': 'success'}, 200
api.add_resource(CompletionMessageApi, '/apps/<uuid:app_id>/completion-messages')
api.add_resource(CompletionMessageStopApi, '/apps/<uuid:app_id>/completion-messages/<string:task_id>/stop')
api.add_resource(ChatMessageApi, '/apps/<uuid:app_id>/chat-messages')
api.add_resource(ChatMessageStopApi, '/apps/<uuid:app_id>/chat-messages/<string:task_id>/stop')

View File

@ -0,0 +1,384 @@
from datetime import datetime
import pytz
from flask_login import login_required, current_user
from flask_restful import Resource, reqparse, fields, marshal_with
from flask_restful.inputs import int_range
from sqlalchemy import or_, func
from sqlalchemy.orm import joinedload
from werkzeug.exceptions import NotFound
from controllers.console import api
from controllers.console.app import _get_app
from controllers.console.setup import setup_required
from controllers.console.wraps import account_initialization_required
from libs.helper import TimestampField, datetime_string, uuid_value
from extensions.ext_database import db
from models.model import Message, MessageAnnotation, Conversation
account_fields = {
'id': fields.String,
'name': fields.String,
'email': fields.String
}
feedback_fields = {
'rating': fields.String,
'content': fields.String,
'from_source': fields.String,
'from_end_user_id': fields.String,
'from_account': fields.Nested(account_fields, allow_null=True),
}
annotation_fields = {
'content': fields.String,
'account': fields.Nested(account_fields, allow_null=True),
'created_at': TimestampField
}
message_detail_fields = {
'id': fields.String,
'conversation_id': fields.String,
'inputs': fields.Raw,
'query': fields.String,
'message': fields.Raw,
'message_tokens': fields.Integer,
'answer': fields.String,
'answer_tokens': fields.Integer,
'provider_response_latency': fields.Integer,
'from_source': fields.String,
'from_end_user_id': fields.String,
'from_account_id': fields.String,
'feedbacks': fields.List(fields.Nested(feedback_fields)),
'annotation': fields.Nested(annotation_fields, allow_null=True),
'created_at': TimestampField
}
feedback_stat_fields = {
'like': fields.Integer,
'dislike': fields.Integer
}
model_config_fields = {
'opening_statement': fields.String,
'suggested_questions': fields.Raw,
'model': fields.Raw,
'user_input_form': fields.Raw,
'pre_prompt': fields.String,
'agent_mode': fields.Raw,
}
class CompletionConversationApi(Resource):
class MessageTextField(fields.Raw):
def format(self, value):
return value[0]['text'] if value else ''
simple_configs_fields = {
'prompt_template': fields.String,
}
simple_model_config_fields = {
'model': fields.Raw(attribute='model_dict'),
'pre_prompt': fields.String,
}
simple_message_detail_fields = {
'inputs': fields.Raw,
'query': fields.String,
'message': MessageTextField,
'answer': fields.String,
}
conversation_fields = {
'id': fields.String,
'status': fields.String,
'from_source': fields.String,
'from_end_user_id': fields.String,
'from_account_id': fields.String,
'read_at': TimestampField,
'created_at': TimestampField,
'annotation': fields.Nested(annotation_fields, allow_null=True),
'model_config': fields.Nested(simple_model_config_fields),
'user_feedback_stats': fields.Nested(feedback_stat_fields),
'admin_feedback_stats': fields.Nested(feedback_stat_fields),
'message': fields.Nested(simple_message_detail_fields, attribute='first_message')
}
conversation_pagination_fields = {
'page': fields.Integer,
'limit': fields.Integer(attribute='per_page'),
'total': fields.Integer,
'has_more': fields.Boolean(attribute='has_next'),
'data': fields.List(fields.Nested(conversation_fields), attribute='items')
}
@setup_required
@login_required
@account_initialization_required
@marshal_with(conversation_pagination_fields)
def get(self, app_id):
app_id = str(app_id)
parser = reqparse.RequestParser()
parser.add_argument('keyword', type=str, location='args')
parser.add_argument('start', type=datetime_string('%Y-%m-%d %H:%M'), location='args')
parser.add_argument('end', type=datetime_string('%Y-%m-%d %H:%M'), location='args')
parser.add_argument('annotation_status', type=str,
choices=['annotated', 'not_annotated', 'all'], default='all', location='args')
parser.add_argument('page', type=int_range(1, 99999), default=1, location='args')
parser.add_argument('limit', type=int_range(1, 100), default=20, location='args')
args = parser.parse_args()
# get app info
app = _get_app(app_id, 'completion')
query = db.select(Conversation).where(Conversation.app_id == app.id, Conversation.mode == 'completion')
if args['keyword']:
query = query.join(
Message, Message.conversation_id == Conversation.id
).filter(
or_(
Message.query.ilike('%{}%'.format(args['keyword'])),
Message.answer.ilike('%{}%'.format(args['keyword']))
)
)
account = current_user
timezone = pytz.timezone(account.timezone)
utc_timezone = pytz.utc
if args['start']:
start_datetime = datetime.strptime(args['start'], '%Y-%m-%d %H:%M')
start_datetime = start_datetime.replace(second=0)
start_datetime_timezone = timezone.localize(start_datetime)
start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone)
query = query.where(Conversation.created_at >= start_datetime_utc)
if args['end']:
end_datetime = datetime.strptime(args['end'], '%Y-%m-%d %H:%M')
end_datetime = end_datetime.replace(second=0)
end_datetime_timezone = timezone.localize(end_datetime)
end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone)
query = query.where(Conversation.created_at < end_datetime_utc)
if args['annotation_status'] == "annotated":
query = query.options(joinedload(Conversation.message_annotations)).join(
MessageAnnotation, MessageAnnotation.conversation_id == Conversation.id
)
elif args['annotation_status'] == "not_annotated":
query = query.outerjoin(
MessageAnnotation, MessageAnnotation.conversation_id == Conversation.id
).group_by(Conversation.id).having(func.count(MessageAnnotation.id) == 0)
query = query.order_by(Conversation.created_at.desc())
conversations = db.paginate(
query,
page=args['page'],
per_page=args['limit'],
error_out=False
)
return conversations
class CompletionConversationDetailApi(Resource):
conversation_detail_fields = {
'id': fields.String,
'status': fields.String,
'from_source': fields.String,
'from_end_user_id': fields.String,
'from_account_id': fields.String,
'created_at': TimestampField,
'model_config': fields.Nested(model_config_fields),
'message': fields.Nested(message_detail_fields, attribute='first_message'),
}
@setup_required
@login_required
@account_initialization_required
@marshal_with(conversation_detail_fields)
def get(self, app_id, conversation_id):
app_id = str(app_id)
conversation_id = str(conversation_id)
return _get_conversation(app_id, conversation_id, 'completion')
class ChatConversationApi(Resource):
simple_configs_fields = {
'prompt_template': fields.String,
}
simple_model_config_fields = {
'model': fields.Raw(attribute='model_dict'),
'pre_prompt': fields.String,
}
conversation_fields = {
'id': fields.String,
'status': fields.String,
'from_source': fields.String,
'from_end_user_id': fields.String,
'from_account_id': fields.String,
'summary': fields.String(attribute='summary_or_query'),
'read_at': TimestampField,
'created_at': TimestampField,
'annotated': fields.Boolean,
'model_config': fields.Nested(simple_model_config_fields),
'message_count': fields.Integer,
'user_feedback_stats': fields.Nested(feedback_stat_fields),
'admin_feedback_stats': fields.Nested(feedback_stat_fields)
}
conversation_pagination_fields = {
'page': fields.Integer,
'limit': fields.Integer(attribute='per_page'),
'total': fields.Integer,
'has_more': fields.Boolean(attribute='has_next'),
'data': fields.List(fields.Nested(conversation_fields), attribute='items')
}
@setup_required
@login_required
@account_initialization_required
@marshal_with(conversation_pagination_fields)
def get(self, app_id):
app_id = str(app_id)
parser = reqparse.RequestParser()
parser.add_argument('keyword', type=str, location='args')
parser.add_argument('start', type=datetime_string('%Y-%m-%d %H:%M'), location='args')
parser.add_argument('end', type=datetime_string('%Y-%m-%d %H:%M'), location='args')
parser.add_argument('annotation_status', type=str,
choices=['annotated', 'not_annotated', 'all'], default='all', location='args')
parser.add_argument('message_count_gte', type=int_range(1, 99999), required=False, location='args')
parser.add_argument('page', type=int_range(1, 99999), required=False, default=1, location='args')
parser.add_argument('limit', type=int_range(1, 100), required=False, default=20, location='args')
args = parser.parse_args()
# get app info
app = _get_app(app_id, 'chat')
query = db.select(Conversation).where(Conversation.app_id == app.id, Conversation.mode == 'chat')
if args['keyword']:
query = query.join(
Message, Message.conversation_id == Conversation.id
).filter(
or_(
Message.query.ilike('%{}%'.format(args['keyword'])),
Message.answer.ilike('%{}%'.format(args['keyword'])),
Conversation.name.ilike('%{}%'.format(args['keyword'])),
Conversation.introduction.ilike('%{}%'.format(args['keyword'])),
),
)
account = current_user
timezone = pytz.timezone(account.timezone)
utc_timezone = pytz.utc
if args['start']:
start_datetime = datetime.strptime(args['start'], '%Y-%m-%d %H:%M')
start_datetime = start_datetime.replace(second=0)
start_datetime_timezone = timezone.localize(start_datetime)
start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone)
query = query.where(Conversation.created_at >= start_datetime_utc)
if args['end']:
end_datetime = datetime.strptime(args['end'], '%Y-%m-%d %H:%M')
end_datetime = end_datetime.replace(second=0)
end_datetime_timezone = timezone.localize(end_datetime)
end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone)
query = query.where(Conversation.created_at < end_datetime_utc)
if args['annotation_status'] == "annotated":
query = query.options(joinedload(Conversation.message_annotations)).join(
MessageAnnotation, MessageAnnotation.conversation_id == Conversation.id
)
elif args['annotation_status'] == "not_annotated":
query = query.outerjoin(
MessageAnnotation, MessageAnnotation.conversation_id == Conversation.id
).group_by(Conversation.id).having(func.count(MessageAnnotation.id) == 0)
if args['message_count_gte'] and args['message_count_gte'] >= 1:
query = (
query.options(joinedload(Conversation.messages))
.join(Message, Message.conversation_id == Conversation.id)
.group_by(Conversation.id)
.having(func.count(Message.id) >= args['message_count_gte'])
)
query = query.order_by(Conversation.created_at.desc())
conversations = db.paginate(
query,
page=args['page'],
per_page=args['limit'],
error_out=False
)
return conversations
class ChatConversationDetailApi(Resource):
conversation_detail_fields = {
'id': fields.String,
'status': fields.String,
'from_source': fields.String,
'from_end_user_id': fields.String,
'from_account_id': fields.String,
'created_at': TimestampField,
'annotated': fields.Boolean,
'model_config': fields.Nested(model_config_fields),
'message_count': fields.Integer,
'user_feedback_stats': fields.Nested(feedback_stat_fields),
'admin_feedback_stats': fields.Nested(feedback_stat_fields)
}
@setup_required
@login_required
@account_initialization_required
@marshal_with(conversation_detail_fields)
def get(self, app_id, conversation_id):
app_id = str(app_id)
conversation_id = str(conversation_id)
return _get_conversation(app_id, conversation_id, 'chat')
api.add_resource(CompletionConversationApi, '/apps/<uuid:app_id>/completion-conversations')
api.add_resource(CompletionConversationDetailApi, '/apps/<uuid:app_id>/completion-conversations/<uuid:conversation_id>')
api.add_resource(ChatConversationApi, '/apps/<uuid:app_id>/chat-conversations')
api.add_resource(ChatConversationDetailApi, '/apps/<uuid:app_id>/chat-conversations/<uuid:conversation_id>')
def _get_conversation(app_id, conversation_id, mode):
# get app info
app = _get_app(app_id, mode)
conversation = db.session.query(Conversation) \
.filter(Conversation.id == conversation_id, Conversation.app_id == app.id).first()
if not conversation:
raise NotFound("Conversation Not Exists.")
if not conversation.read_at:
conversation.read_at = datetime.utcnow()
conversation.read_account_id = current_user.id
db.session.commit()
return conversation

View File

@ -0,0 +1,49 @@
from libs.exception import BaseHTTPException
class AppNotFoundError(BaseHTTPException):
error_code = 'app_not_found'
description = "App not found."
code = 404
class ProviderNotInitializeError(BaseHTTPException):
error_code = 'provider_not_initialize'
description = "Provider Token not initialize."
code = 400
class ProviderQuotaExceededError(BaseHTTPException):
error_code = 'provider_quota_exceeded'
description = "Provider quota exceeded."
code = 400
class ProviderModelCurrentlyNotSupportError(BaseHTTPException):
error_code = 'model_currently_not_support'
description = "GPT-4 currently not support."
code = 400
class ConversationCompletedError(BaseHTTPException):
error_code = 'conversation_completed'
description = "Conversation was completed."
code = 400
class AppUnavailableError(BaseHTTPException):
error_code = 'app_unavailable'
description = "App unavailable."
code = 400
class CompletionRequestError(BaseHTTPException):
error_code = 'completion_request_error'
description = "Completion request failed."
code = 400
class AppMoreLikeThisDisabledError(BaseHTTPException):
error_code = 'app_more_like_this_disabled'
description = "More like this disabled."
code = 403

View File

@ -0,0 +1,209 @@
# -*- coding:utf-8 -*-
from datetime import datetime
from flask_login import login_required, current_user
from flask_restful import Resource, reqparse, fields, marshal_with, abort, inputs
from sqlalchemy import and_
from controllers.console import api
from extensions.ext_database import db
from models.model import Tenant, App, InstalledApp, RecommendedApp
from services.account_service import TenantService
app_fields = {
'id': fields.String,
'name': fields.String,
'mode': fields.String,
'icon': fields.String,
'icon_background': fields.String
}
installed_app_fields = {
'id': fields.String,
'app': fields.Nested(app_fields, attribute='app'),
'app_owner_tenant_id': fields.String,
'is_pinned': fields.Boolean,
'last_used_at': fields.DateTime,
'editable': fields.Boolean
}
installed_app_list_fields = {
'installed_apps': fields.List(fields.Nested(installed_app_fields))
}
recommended_app_fields = {
'app': fields.Nested(app_fields, attribute='app'),
'app_id': fields.String,
'description': fields.String(attribute='description'),
'copyright': fields.String,
'privacy_policy': fields.String,
'category': fields.String,
'position': fields.Integer,
'is_listed': fields.Boolean,
'install_count': fields.Integer,
'installed': fields.Boolean,
'editable': fields.Boolean
}
recommended_app_list_fields = {
'recommended_apps': fields.List(fields.Nested(recommended_app_fields)),
'categories': fields.List(fields.String)
}
class InstalledAppsListResource(Resource):
@login_required
@marshal_with(installed_app_list_fields)
def get(self):
current_tenant_id = Tenant.query.first().id
installed_apps = db.session.query(InstalledApp).filter(
InstalledApp.tenant_id == current_tenant_id
).all()
current_user.role = TenantService.get_user_role(current_user, current_user.current_tenant)
installed_apps = [
{
**installed_app,
"editable": current_user.role in ["owner", "admin"],
}
for installed_app in installed_apps
]
installed_apps.sort(key=lambda app: (-app.is_pinned, app.last_used_at))
return {'installed_apps': installed_apps}
@login_required
def post(self):
parser = reqparse.RequestParser()
parser.add_argument('app_id', type=str, required=True, help='Invalid app_id')
args = parser.parse_args()
current_tenant_id = Tenant.query.first().id
app = App.query.get(args['app_id'])
if app is None:
abort(404, message='App not found')
recommended_app = RecommendedApp.query.filter(RecommendedApp.app_id == args['app_id']).first()
if recommended_app is None:
abort(404, message='App not found')
if not app.is_public:
abort(403, message="You can't install a non-public app")
installed_app = InstalledApp.query.filter(and_(
InstalledApp.app_id == args['app_id'],
InstalledApp.tenant_id == current_tenant_id
)).first()
if installed_app is None:
# todo: position
recommended_app.install_count += 1
new_installed_app = InstalledApp(
app_id=args['app_id'],
tenant_id=current_tenant_id,
is_pinned=False,
last_used_at=datetime.utcnow()
)
db.session.add(new_installed_app)
db.session.commit()
return {'message': 'App installed successfully'}
class InstalledAppResource(Resource):
@login_required
def delete(self, installed_app_id):
installed_app = InstalledApp.query.filter(and_(
InstalledApp.id == str(installed_app_id),
InstalledApp.tenant_id == current_user.current_tenant_id
)).first()
if installed_app is None:
abort(404, message='App not found')
if installed_app.app_owner_tenant_id == current_user.current_tenant_id:
abort(400, message="You can't uninstall an app owned by the current tenant")
db.session.delete(installed_app)
db.session.commit()
return {'result': 'success', 'message': 'App uninstalled successfully'}
@login_required
def patch(self, installed_app_id):
parser = reqparse.RequestParser()
parser.add_argument('is_pinned', type=inputs.boolean)
args = parser.parse_args()
current_tenant_id = Tenant.query.first().id
installed_app = InstalledApp.query.filter(and_(
InstalledApp.id == str(installed_app_id),
InstalledApp.tenant_id == current_tenant_id
)).first()
if installed_app is None:
abort(404, message='Installed app not found')
commit_args = False
if 'is_pinned' in args:
installed_app.is_pinned = args['is_pinned']
commit_args = True
if commit_args:
db.session.commit()
return {'result': 'success', 'message': 'App info updated successfully'}
class RecommendedAppsResource(Resource):
@login_required
@marshal_with(recommended_app_list_fields)
def get(self):
recommended_apps = db.session.query(RecommendedApp).filter(
RecommendedApp.is_listed == True
).all()
categories = set()
current_user.role = TenantService.get_user_role(current_user, current_user.current_tenant)
recommended_apps_result = []
for recommended_app in recommended_apps:
installed = db.session.query(InstalledApp).filter(
and_(
InstalledApp.app_id == recommended_app.app_id,
InstalledApp.tenant_id == current_user.current_tenant_id
)
).first() is not None
language_prefix = current_user.interface_language.split('-')[0]
desc = None
if recommended_app.description:
if language_prefix in recommended_app.description:
desc = recommended_app.description[language_prefix]
elif 'en' in recommended_app.description:
desc = recommended_app.description['en']
recommended_app_result = {
'id': recommended_app.id,
'app': recommended_app.app,
'app_id': recommended_app.app_id,
'description': desc,
'copyright': recommended_app.copyright,
'privacy_policy': recommended_app.privacy_policy,
'category': recommended_app.category,
'position': recommended_app.position,
'is_listed': recommended_app.is_listed,
'install_count': recommended_app.install_count,
'installed': installed,
'editable': current_user.role in ['owner', 'admin'],
}
recommended_apps_result.append(recommended_app_result)
categories.add(recommended_app.category) # add category to categories
return {'recommended_apps': recommended_apps_result, 'categories': list(categories)}
api.add_resource(InstalledAppsListResource, '/installed-apps')
api.add_resource(InstalledAppResource, '/installed-apps/<uuid:installed_app_id>')
api.add_resource(RecommendedAppsResource, '/explore/apps')

View File

@ -0,0 +1,361 @@
import json
import logging
from typing import Union, Generator
from flask import Response, stream_with_context
from flask_login import current_user, login_required
from flask_restful import Resource, reqparse, marshal_with, fields
from flask_restful.inputs import int_range
from werkzeug.exceptions import InternalServerError, NotFound
from controllers.console import api
from controllers.console.app import _get_app
from controllers.console.app.error import CompletionRequestError, ProviderNotInitializeError, \
AppMoreLikeThisDisabledError, ProviderQuotaExceededError, ProviderModelCurrentlyNotSupportError
from controllers.console.setup import setup_required
from controllers.console.wraps import account_initialization_required
from core.llm.error import LLMRateLimitError, LLMBadRequestError, LLMAuthorizationError, LLMAPIConnectionError, \
ProviderTokenNotInitError, LLMAPIUnavailableError, QuotaExceededError, ModelCurrentlyNotSupportError
from libs.helper import uuid_value, TimestampField
from libs.infinite_scroll_pagination import InfiniteScrollPagination
from extensions.ext_database import db
from models.model import MessageAnnotation, Conversation, Message, MessageFeedback
from services.completion_service import CompletionService
from services.errors.app import MoreLikeThisDisabledError
from services.errors.conversation import ConversationNotExistsError
from services.errors.message import MessageNotExistsError
from services.message_service import MessageService
class ChatMessageApi(Resource):
account_fields = {
'id': fields.String,
'name': fields.String,
'email': fields.String
}
feedback_fields = {
'rating': fields.String,
'content': fields.String,
'from_source': fields.String,
'from_end_user_id': fields.String,
'from_account': fields.Nested(account_fields, allow_null=True),
}
annotation_fields = {
'content': fields.String,
'account': fields.Nested(account_fields, allow_null=True),
'created_at': TimestampField
}
message_detail_fields = {
'id': fields.String,
'conversation_id': fields.String,
'inputs': fields.Raw,
'query': fields.String,
'message': fields.Raw,
'message_tokens': fields.Integer,
'answer': fields.String,
'answer_tokens': fields.Integer,
'provider_response_latency': fields.Integer,
'from_source': fields.String,
'from_end_user_id': fields.String,
'from_account_id': fields.String,
'feedbacks': fields.List(fields.Nested(feedback_fields)),
'annotation': fields.Nested(annotation_fields, allow_null=True),
'created_at': TimestampField
}
message_infinite_scroll_pagination_fields = {
'limit': fields.Integer,
'has_more': fields.Boolean,
'data': fields.List(fields.Nested(message_detail_fields))
}
@setup_required
@login_required
@account_initialization_required
@marshal_with(message_infinite_scroll_pagination_fields)
def get(self, app_id):
app_id = str(app_id)
# get app info
app = _get_app(app_id, 'chat')
parser = reqparse.RequestParser()
parser.add_argument('conversation_id', required=True, type=uuid_value, location='args')
parser.add_argument('first_id', type=uuid_value, location='args')
parser.add_argument('limit', type=int_range(1, 100), required=False, default=20, location='args')
args = parser.parse_args()
conversation = db.session.query(Conversation).filter(
Conversation.id == args['conversation_id'],
Conversation.app_id == app.id
).first()
if not conversation:
raise NotFound("Conversation Not Exists.")
if args['first_id']:
first_message = db.session.query(Message) \
.filter(Message.conversation_id == conversation.id, Message.id == args['first_id']).first()
if not first_message:
raise NotFound("First message not found")
history_messages = db.session.query(Message).filter(
Message.conversation_id == conversation.id,
Message.created_at < first_message.created_at,
Message.id != first_message.id
) \
.order_by(Message.created_at.desc()).limit(args['limit']).all()
else:
history_messages = db.session.query(Message).filter(Message.conversation_id == conversation.id) \
.order_by(Message.created_at.desc()).limit(args['limit']).all()
has_more = False
if len(history_messages) == args['limit']:
current_page_first_message = history_messages[-1]
rest_count = db.session.query(Message).filter(
Message.conversation_id == conversation.id,
Message.created_at < current_page_first_message.created_at,
Message.id != current_page_first_message.id
).count()
if rest_count > 0:
has_more = True
history_messages = list(reversed(history_messages))
return InfiniteScrollPagination(
data=history_messages,
limit=args['limit'],
has_more=has_more
)
class MessageFeedbackApi(Resource):
@setup_required
@login_required
@account_initialization_required
def post(self, app_id):
app_id = str(app_id)
# get app info
app = _get_app(app_id)
parser = reqparse.RequestParser()
parser.add_argument('message_id', required=True, type=uuid_value, location='json')
parser.add_argument('rating', type=str, choices=['like', 'dislike', None], location='json')
args = parser.parse_args()
message_id = str(args['message_id'])
message = db.session.query(Message).filter(
Message.id == message_id,
Message.app_id == app.id
).first()
if not message:
raise NotFound("Message Not Exists.")
feedback = message.admin_feedback
if not args['rating'] and feedback:
db.session.delete(feedback)
elif args['rating'] and feedback:
feedback.rating = args['rating']
elif not args['rating'] and not feedback:
raise ValueError('rating cannot be None when feedback not exists')
else:
feedback = MessageFeedback(
app_id=app.id,
conversation_id=message.conversation_id,
message_id=message.id,
rating=args['rating'],
from_source='admin',
from_account_id=current_user.id
)
db.session.add(feedback)
db.session.commit()
return {'result': 'success'}
class MessageAnnotationApi(Resource):
@setup_required
@login_required
@account_initialization_required
def post(self, app_id):
app_id = str(app_id)
# get app info
app = _get_app(app_id)
parser = reqparse.RequestParser()
parser.add_argument('message_id', required=True, type=uuid_value, location='json')
parser.add_argument('content', type=str, location='json')
args = parser.parse_args()
message_id = str(args['message_id'])
message = db.session.query(Message).filter(
Message.id == message_id,
Message.app_id == app.id
).first()
if not message:
raise NotFound("Message Not Exists.")
annotation = message.annotation
if annotation:
annotation.content = args['content']
else:
annotation = MessageAnnotation(
app_id=app.id,
conversation_id=message.conversation_id,
message_id=message.id,
content=args['content'],
account_id=current_user.id
)
db.session.add(annotation)
db.session.commit()
return {'result': 'success'}
class MessageAnnotationCountApi(Resource):
@setup_required
@login_required
@account_initialization_required
def get(self, app_id):
app_id = str(app_id)
# get app info
app = _get_app(app_id)
count = db.session.query(MessageAnnotation).filter(
MessageAnnotation.app_id == app.id
).count()
return {'count': count}
class MessageMoreLikeThisApi(Resource):
@setup_required
@login_required
@account_initialization_required
def get(self, app_id, message_id):
app_id = str(app_id)
message_id = str(message_id)
parser = reqparse.RequestParser()
parser.add_argument('response_mode', type=str, required=True, choices=['blocking', 'streaming'], location='args')
args = parser.parse_args()
streaming = args['response_mode'] == 'streaming'
# get app info
app_model = _get_app(app_id, 'completion')
try:
response = CompletionService.generate_more_like_this(app_model, current_user, message_id, streaming)
return compact_response(response)
except MessageNotExistsError:
raise NotFound("Message Not Exists.")
except MoreLikeThisDisabledError:
raise AppMoreLikeThisDisabledError()
except ProviderTokenNotInitError:
raise ProviderNotInitializeError()
except QuotaExceededError:
raise ProviderQuotaExceededError()
except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError()
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
LLMRateLimitError, LLMAuthorizationError) as e:
raise CompletionRequestError(str(e))
except ValueError as e:
raise e
except Exception as e:
logging.exception("internal server error.")
raise InternalServerError()
def compact_response(response: Union[dict | Generator]) -> Response:
if isinstance(response, dict):
return Response(response=json.dumps(response), status=200, mimetype='application/json')
else:
def generate() -> Generator:
try:
for chunk in response:
yield chunk
except MessageNotExistsError:
yield "data: " + json.dumps(api.handle_error(NotFound("Message Not Exists.")).get_json()) + "\n\n"
except MoreLikeThisDisabledError:
yield "data: " + json.dumps(api.handle_error(AppMoreLikeThisDisabledError()).get_json()) + "\n\n"
except ProviderTokenNotInitError:
yield "data: " + json.dumps(api.handle_error(ProviderNotInitializeError()).get_json()) + "\n\n"
except QuotaExceededError:
yield "data: " + json.dumps(api.handle_error(ProviderQuotaExceededError()).get_json()) + "\n\n"
except ModelCurrentlyNotSupportError:
yield "data: " + json.dumps(api.handle_error(ProviderModelCurrentlyNotSupportError()).get_json()) + "\n\n"
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
LLMRateLimitError, LLMAuthorizationError) as e:
yield "data: " + json.dumps(api.handle_error(CompletionRequestError(str(e))).get_json()) + "\n\n"
except ValueError as e:
yield "data: " + json.dumps(api.handle_error(e).get_json()) + "\n\n"
except Exception:
logging.exception("internal server error.")
yield "data: " + json.dumps(api.handle_error(InternalServerError()).get_json()) + "\n\n"
return Response(stream_with_context(generate()), status=200,
mimetype='text/event-stream')
class MessageSuggestedQuestionApi(Resource):
@setup_required
@login_required
@account_initialization_required
def get(self, app_id, message_id):
app_id = str(app_id)
message_id = str(message_id)
# get app info
app_model = _get_app(app_id, 'chat')
try:
questions = MessageService.get_suggested_questions_after_answer(
app_model=app_model,
user=current_user,
message_id=message_id,
check_enabled=False
)
except MessageNotExistsError:
raise NotFound("Message not found")
except ConversationNotExistsError:
raise NotFound("Conversation not found")
except ProviderTokenNotInitError:
raise ProviderNotInitializeError()
except QuotaExceededError:
raise ProviderQuotaExceededError()
except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError()
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
LLMRateLimitError, LLMAuthorizationError) as e:
raise CompletionRequestError(str(e))
except Exception:
logging.exception("internal server error.")
raise InternalServerError()
return {'data': questions}
api.add_resource(MessageMoreLikeThisApi, '/apps/<uuid:app_id>/completion-messages/<uuid:message_id>/more-like-this')
api.add_resource(MessageSuggestedQuestionApi, '/apps/<uuid:app_id>/chat-messages/<uuid:message_id>/suggested-questions')
api.add_resource(ChatMessageApi, '/apps/<uuid:app_id>/chat-messages', endpoint='chat_messages')
api.add_resource(MessageFeedbackApi, '/apps/<uuid:app_id>/feedbacks')
api.add_resource(MessageAnnotationApi, '/apps/<uuid:app_id>/annotations')
api.add_resource(MessageAnnotationCountApi, '/apps/<uuid:app_id>/annotations/count')

View File

@ -0,0 +1,65 @@
# -*- coding:utf-8 -*-
import json
from flask import request
from flask_restful import Resource
from flask_login import login_required, current_user
from controllers.console import api
from controllers.console.app import _get_app
from controllers.console.setup import setup_required
from controllers.console.wraps import account_initialization_required
from events.app_event import app_model_config_was_updated
from extensions.ext_database import db
from models.model import AppModelConfig
from services.app_model_config_service import AppModelConfigService
class ModelConfigResource(Resource):
@setup_required
@login_required
@account_initialization_required
def post(self, app_id):
"""Modify app model config"""
app_id = str(app_id)
app_model = _get_app(app_id)
# validate config
model_configuration = AppModelConfigService.validate_configuration(
account=current_user,
config=request.json,
mode=app_model.mode
)
new_app_model_config = AppModelConfig(
app_id=app_model.id,
provider="",
model_id="",
configs={},
opening_statement=model_configuration['opening_statement'],
suggested_questions=json.dumps(model_configuration['suggested_questions']),
suggested_questions_after_answer=json.dumps(model_configuration['suggested_questions_after_answer']),
more_like_this=json.dumps(model_configuration['more_like_this']),
model=json.dumps(model_configuration['model']),
user_input_form=json.dumps(model_configuration['user_input_form']),
pre_prompt=model_configuration['pre_prompt'],
agent_mode=json.dumps(model_configuration['agent_mode']),
)
db.session.add(new_app_model_config)
db.session.flush()
app_model.app_model_config_id = new_app_model_config.id
db.session.commit()
app_model_config_was_updated.send(
app_model,
app_model_config=new_app_model_config
)
return {'result': 'success'}
api.add_resource(ModelConfigResource, '/apps/<uuid:app_id>/model-config')

View File

@ -0,0 +1,114 @@
# -*- coding:utf-8 -*-
from flask_login import login_required, current_user
from flask_restful import Resource, reqparse, fields, marshal_with
from werkzeug.exceptions import NotFound, Forbidden
from controllers.console import api
from controllers.console.app import _get_app
from controllers.console.setup import setup_required
from controllers.console.wraps import account_initialization_required
from libs.helper import supported_language
from extensions.ext_database import db
from models.model import Site
app_site_fields = {
'app_id': fields.String,
'access_token': fields.String(attribute='code'),
'code': fields.String,
'title': fields.String,
'icon': fields.String,
'icon_background': fields.String,
'description': fields.String,
'default_language': fields.String,
'customize_domain': fields.String,
'copyright': fields.String,
'privacy_policy': fields.String,
'customize_token_strategy': fields.String,
'prompt_public': fields.Boolean
}
def parse_app_site_args():
parser = reqparse.RequestParser()
parser.add_argument('title', type=str, required=False, location='json')
parser.add_argument('icon', type=str, required=False, location='json')
parser.add_argument('icon_background', type=str, required=False, location='json')
parser.add_argument('description', type=str, required=False, location='json')
parser.add_argument('default_language', type=supported_language, required=False, location='json')
parser.add_argument('customize_domain', type=str, required=False, location='json')
parser.add_argument('copyright', type=str, required=False, location='json')
parser.add_argument('privacy_policy', type=str, required=False, location='json')
parser.add_argument('customize_token_strategy', type=str, choices=['must', 'allow', 'not_allow'],
required=False,
location='json')
parser.add_argument('prompt_public', type=bool, required=False, location='json')
return parser.parse_args()
class AppSite(Resource):
@setup_required
@login_required
@account_initialization_required
@marshal_with(app_site_fields)
def post(self, app_id):
args = parse_app_site_args()
app_id = str(app_id)
app_model = _get_app(app_id)
# The role of the current user in the ta table must be admin or owner
if current_user.current_tenant.current_role not in ['admin', 'owner']:
raise Forbidden()
site = db.session.query(Site). \
filter(Site.app_id == app_model.id). \
one_or_404()
for attr_name in [
'title',
'icon',
'icon_background',
'description',
'default_language',
'customize_domain',
'copyright',
'privacy_policy',
'customize_token_strategy',
'prompt_public'
]:
value = args.get(attr_name)
if value is not None:
setattr(site, attr_name, value)
db.session.commit()
return site
class AppSiteAccessTokenReset(Resource):
@setup_required
@login_required
@account_initialization_required
@marshal_with(app_site_fields)
def post(self, app_id):
app_id = str(app_id)
app_model = _get_app(app_id)
# The role of the current user in the ta table must be admin or owner
if current_user.current_tenant.current_role not in ['admin', 'owner']:
raise Forbidden()
site = db.session.query(Site).filter(Site.app_id == app_model.id).first()
if not site:
raise NotFound
site.code = Site.generate_code(16)
db.session.commit()
return site
api.add_resource(AppSite, '/apps/<uuid:app_id>/site')
api.add_resource(AppSiteAccessTokenReset, '/apps/<uuid:app_id>/site/access-token-reset')

View File

@ -0,0 +1,202 @@
# -*- coding:utf-8 -*-
from datetime import datetime
import pytz
from flask import jsonify
from flask_login import login_required, current_user
from flask_restful import Resource, reqparse
from controllers.console import api
from controllers.console.app import _get_app
from controllers.console.setup import setup_required
from controllers.console.wraps import account_initialization_required
from libs.helper import datetime_string
from extensions.ext_database import db
class DailyConversationStatistic(Resource):
@setup_required
@login_required
@account_initialization_required
def get(self, app_id):
account = current_user
app_id = str(app_id)
app_model = _get_app(app_id)
parser = reqparse.RequestParser()
parser.add_argument('start', type=datetime_string('%Y-%m-%d %H:%M'), location='args')
parser.add_argument('end', type=datetime_string('%Y-%m-%d %H:%M'), location='args')
args = parser.parse_args()
sql_query = '''
SELECT date(DATE_TRUNC('day', created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz )) AS date, count(distinct messages.conversation_id) AS conversation_count
FROM messages where app_id = :app_id
'''
arg_dict = {'tz': account.timezone, 'app_id': app_model.id}
timezone = pytz.timezone(account.timezone)
utc_timezone = pytz.utc
if args['start']:
start_datetime = datetime.strptime(args['start'], '%Y-%m-%d %H:%M')
start_datetime = start_datetime.replace(second=0)
start_datetime_timezone = timezone.localize(start_datetime)
start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone)
sql_query += ' and created_at >= :start'
arg_dict['start'] = start_datetime_utc
if args['end']:
end_datetime = datetime.strptime(args['end'], '%Y-%m-%d %H:%M')
end_datetime = end_datetime.replace(second=0)
end_datetime_timezone = timezone.localize(end_datetime)
end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone)
sql_query += ' and created_at < :end'
arg_dict['end'] = end_datetime_utc
sql_query += ' GROUP BY date order by date'
rs = db.session.execute(sql_query, arg_dict)
response_date = []
for i in rs:
response_date.append({
'date': str(i.date),
'conversation_count': i.conversation_count
})
return jsonify({
'data': response_date
})
class DailyTerminalsStatistic(Resource):
@setup_required
@login_required
@account_initialization_required
def get(self, app_id):
account = current_user
app_id = str(app_id)
app_model = _get_app(app_id)
parser = reqparse.RequestParser()
parser.add_argument('start', type=datetime_string('%Y-%m-%d %H:%M'), location='args')
parser.add_argument('end', type=datetime_string('%Y-%m-%d %H:%M'), location='args')
args = parser.parse_args()
sql_query = '''
SELECT date(DATE_TRUNC('day', created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz )) AS date, count(distinct messages.from_end_user_id) AS terminal_count
FROM messages where app_id = :app_id
'''
arg_dict = {'tz': account.timezone, 'app_id': app_model.id}
timezone = pytz.timezone(account.timezone)
utc_timezone = pytz.utc
if args['start']:
start_datetime = datetime.strptime(args['start'], '%Y-%m-%d %H:%M')
start_datetime = start_datetime.replace(second=0)
start_datetime_timezone = timezone.localize(start_datetime)
start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone)
sql_query += ' and created_at >= :start'
arg_dict['start'] = start_datetime_utc
if args['end']:
end_datetime = datetime.strptime(args['end'], '%Y-%m-%d %H:%M')
end_datetime = end_datetime.replace(second=0)
end_datetime_timezone = timezone.localize(end_datetime)
end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone)
sql_query += ' and created_at < :end'
arg_dict['end'] = end_datetime_utc
sql_query += ' GROUP BY date order by date'
rs = db.session.execute(sql_query, arg_dict)
response_date = []
for i in rs:
response_date.append({
'date': str(i.date),
'terminal_count': i.terminal_count
})
return jsonify({
'data': response_date
})
class DailyTokenCostStatistic(Resource):
@setup_required
@login_required
@account_initialization_required
def get(self, app_id):
account = current_user
app_id = str(app_id)
app_model = _get_app(app_id)
parser = reqparse.RequestParser()
parser.add_argument('start', type=datetime_string('%Y-%m-%d %H:%M'), location='args')
parser.add_argument('end', type=datetime_string('%Y-%m-%d %H:%M'), location='args')
args = parser.parse_args()
sql_query = '''
SELECT date(DATE_TRUNC('day', created_at AT TIME ZONE 'UTC' AT TIME ZONE :tz )) AS date,
(sum(messages.message_tokens) + sum(messages.answer_tokens)) as token_count,
sum(total_price) as total_price
FROM messages where app_id = :app_id
'''
arg_dict = {'tz': account.timezone, 'app_id': app_model.id}
timezone = pytz.timezone(account.timezone)
utc_timezone = pytz.utc
if args['start']:
start_datetime = datetime.strptime(args['start'], '%Y-%m-%d %H:%M')
start_datetime = start_datetime.replace(second=0)
start_datetime_timezone = timezone.localize(start_datetime)
start_datetime_utc = start_datetime_timezone.astimezone(utc_timezone)
sql_query += ' and created_at >= :start'
arg_dict['start'] = start_datetime_utc
if args['end']:
end_datetime = datetime.strptime(args['end'], '%Y-%m-%d %H:%M')
end_datetime = end_datetime.replace(second=0)
end_datetime_timezone = timezone.localize(end_datetime)
end_datetime_utc = end_datetime_timezone.astimezone(utc_timezone)
sql_query += ' and created_at < :end'
arg_dict['end'] = end_datetime_utc
sql_query += ' GROUP BY date order by date'
rs = db.session.execute(sql_query, arg_dict)
response_date = []
for i in rs:
response_date.append({
'date': str(i.date),
'token_count': i.token_count,
'total_price': i.total_price,
'currency': 'USD'
})
return jsonify({
'data': response_date
})
api.add_resource(DailyConversationStatistic, '/apps/<uuid:app_id>/statistics/daily-conversations')
api.add_resource(DailyTerminalsStatistic, '/apps/<uuid:app_id>/statistics/daily-end-users')
api.add_resource(DailyTokenCostStatistic, '/apps/<uuid:app_id>/statistics/token-costs')

View File

@ -0,0 +1,109 @@
# -*- coding:utf-8 -*-
import flask
import flask_login
from flask import request, current_app
from flask_restful import Resource, reqparse
import services
from controllers.console import api
from controllers.console.error import AccountNotLinkTenantError
from controllers.console.setup import setup_required
from libs.helper import email
from libs.password import valid_password
from services.account_service import AccountService, TenantService
class LoginApi(Resource):
"""Resource for user login."""
@setup_required
def post(self):
"""Authenticate user and login."""
parser = reqparse.RequestParser()
parser.add_argument('email', type=email, required=True, location='json')
parser.add_argument('password', type=valid_password, required=True, location='json')
parser.add_argument('remember_me', type=bool, required=False, default=False, location='json')
args = parser.parse_args()
# todo: Verify the recaptcha
try:
account = AccountService.authenticate(args['email'], args['password'])
except services.errors.account.AccountLoginError:
return {'code': 'unauthorized', 'message': 'Invalid email or password'}, 401
try:
TenantService.switch_tenant(account)
except Exception:
raise AccountNotLinkTenantError("Account not link tenant")
flask_login.login_user(account, remember=args['remember_me'])
AccountService.update_last_login(account, request)
# todo: return the user info
return {'result': 'success'}
class LogoutApi(Resource):
@setup_required
def get(self):
flask.session.pop('workspace_id', None)
flask_login.logout_user()
return {'result': 'success'}
class ResetPasswordApi(Resource):
@setup_required
def get(self):
parser = reqparse.RequestParser()
parser.add_argument('email', type=email, required=True, location='json')
args = parser.parse_args()
# import mailchimp_transactional as MailchimpTransactional
# from mailchimp_transactional.api_client import ApiClientError
account = {'email': args['email']}
# account = AccountService.get_by_email(args['email'])
# if account is None:
# raise ValueError('Email not found')
# new_password = AccountService.generate_password()
# AccountService.update_password(account, new_password)
# todo: Send email
MAILCHIMP_API_KEY = current_app.config['MAILCHIMP_TRANSACTIONAL_API_KEY']
# mailchimp = MailchimpTransactional(MAILCHIMP_API_KEY)
message = {
'from_email': 'noreply@example.com',
'to': [{'email': account.email}],
'subject': 'Reset your Dify password',
'html': """
<p>Dear User,</p>
<p>The Dify team has generated a new password for you, details as follows:</p>
<p><strong>{new_password}</strong></p>
<p>Please change your password to log in as soon as possible.</p>
<p>Regards,</p>
<p>The Dify Team</p>
"""
}
# response = mailchimp.messages.send({
# 'message': message,
# # required for transactional email
# ' settings': {
# 'sandbox_mode': current_app.config['MAILCHIMP_SANDBOX_MODE'],
# },
# })
# Check if MSG was sent
# if response.status_code != 200:
# # handle error
# pass
return {'result': 'success'}
api.add_resource(LoginApi, '/login')
api.add_resource(LogoutApi, '/logout')

View File

@ -0,0 +1,126 @@
import logging
from datetime import datetime
from typing import Optional
import flask_login
import requests
from flask import request, redirect, current_app, session
from flask_restful import Resource
from libs.oauth import OAuthUserInfo, GitHubOAuth, GoogleOAuth
from extensions.ext_database import db
from models.account import Account, AccountStatus
from services.account_service import AccountService, RegisterService
from .. import api
def get_oauth_providers():
with current_app.app_context():
github_oauth = GitHubOAuth(client_id=current_app.config.get('GITHUB_CLIENT_ID'),
client_secret=current_app.config.get(
'GITHUB_CLIENT_SECRET'),
redirect_uri=current_app.config.get(
'CONSOLE_URL') + '/console/api/oauth/authorize/github')
google_oauth = GoogleOAuth(client_id=current_app.config.get('GOOGLE_CLIENT_ID'),
client_secret=current_app.config.get(
'GOOGLE_CLIENT_SECRET'),
redirect_uri=current_app.config.get(
'CONSOLE_URL') + '/console/api/oauth/authorize/google')
OAUTH_PROVIDERS = {
'github': github_oauth,
'google': google_oauth
}
return OAUTH_PROVIDERS
class OAuthLogin(Resource):
def get(self, provider: str):
OAUTH_PROVIDERS = get_oauth_providers()
with current_app.app_context():
oauth_provider = OAUTH_PROVIDERS.get(provider)
print(vars(oauth_provider))
if not oauth_provider:
return {'error': 'Invalid provider'}, 400
auth_url = oauth_provider.get_authorization_url()
return redirect(auth_url)
class OAuthCallback(Resource):
def get(self, provider: str):
OAUTH_PROVIDERS = get_oauth_providers()
with current_app.app_context():
oauth_provider = OAUTH_PROVIDERS.get(provider)
if not oauth_provider:
return {'error': 'Invalid provider'}, 400
code = request.args.get('code')
try:
token = oauth_provider.get_access_token(code)
user_info = oauth_provider.get_user_info(token)
except requests.exceptions.HTTPError as e:
logging.exception(
f"An error occurred during the OAuth process with {provider}: {e.response.text}")
return {'error': 'OAuth process failed'}, 400
account = _generate_account(provider, user_info)
# Check account status
if account.status == AccountStatus.BANNED.value or account.status == AccountStatus.CLOSED.value:
return {'error': 'Account is banned or closed.'}, 403
if account.status == AccountStatus.PENDING.value:
account.status = AccountStatus.ACTIVE.value
account.initialized_at = datetime.utcnow()
db.session.commit()
# login user
session.clear()
flask_login.login_user(account, remember=True)
AccountService.update_last_login(account, request)
return redirect(f'{current_app.config.get("CONSOLE_URL")}?oauth_login=success')
def _get_account_by_openid_or_email(provider: str, user_info: OAuthUserInfo) -> Optional[Account]:
account = Account.get_by_openid(provider, user_info.id)
if not account:
account = Account.query.filter_by(email=user_info.email).first()
return account
def _generate_account(provider: str, user_info: OAuthUserInfo):
# Get account by openid or email.
account = _get_account_by_openid_or_email(provider, user_info)
if not account:
# Create account
account_name = user_info.name if user_info.name else 'Dify'
account = RegisterService.register(
email=user_info.email,
name=account_name,
password=None,
open_id=user_info.id,
provider=provider
)
# Set interface language
preferred_lang = request.accept_languages.best_match(['zh', 'en'])
if preferred_lang == 'zh':
interface_language = 'zh-Hans'
else:
interface_language = 'en-US'
account.interface_language = interface_language
db.session.commit()
# Link account
AccountService.link_account_integrate(provider, user_info.id, account)
return account
api.add_resource(OAuthLogin, '/oauth/login/<provider>')
api.add_resource(OAuthCallback, '/oauth/authorize/<provider>')

View File

@ -0,0 +1,281 @@
# -*- coding:utf-8 -*-
from flask import request
from flask_login import login_required, current_user
from flask_restful import Resource, reqparse, fields, marshal, marshal_with
from werkzeug.exceptions import NotFound, Forbidden
import services
from controllers.console import api
from controllers.console.datasets.error import DatasetNameDuplicateError
from controllers.console.setup import setup_required
from controllers.console.wraps import account_initialization_required
from core.indexing_runner import IndexingRunner
from libs.helper import TimestampField
from extensions.ext_database import db
from models.model import UploadFile
from services.dataset_service import DatasetService
dataset_detail_fields = {
'id': fields.String,
'name': fields.String,
'description': fields.String,
'provider': fields.String,
'permission': fields.String,
'data_source_type': fields.String,
'indexing_technique': fields.String,
'app_count': fields.Integer,
'document_count': fields.Integer,
'word_count': fields.Integer,
'created_by': fields.String,
'created_at': TimestampField,
'updated_by': fields.String,
'updated_at': TimestampField,
}
dataset_query_detail_fields = {
"id": fields.String,
"content": fields.String,
"source": fields.String,
"source_app_id": fields.String,
"created_by_role": fields.String,
"created_by": fields.String,
"created_at": TimestampField
}
def _validate_name(name):
if not name or len(name) < 1 or len(name) > 40:
raise ValueError('Name must be between 1 to 40 characters.')
return name
def _validate_description_length(description):
if len(description) > 200:
raise ValueError('Description cannot exceed 200 characters.')
return description
class DatasetListApi(Resource):
@setup_required
@login_required
@account_initialization_required
def get(self):
page = request.args.get('page', default=1, type=int)
limit = request.args.get('limit', default=20, type=int)
ids = request.args.getlist('ids')
provider = request.args.get('provider', default="vendor")
if ids:
datasets, total = DatasetService.get_datasets_by_ids(ids, current_user.current_tenant_id)
else:
datasets, total = DatasetService.get_datasets(page, limit, provider,
current_user.current_tenant_id, current_user)
response = {
'data': marshal(datasets, dataset_detail_fields),
'has_more': len(datasets) == limit,
'limit': limit,
'total': total,
'page': page
}
return response, 200
@setup_required
@login_required
@account_initialization_required
def post(self):
parser = reqparse.RequestParser()
parser.add_argument('name', nullable=False, required=True,
help='type is required. Name must be between 1 to 40 characters.',
type=_validate_name)
parser.add_argument('indexing_technique', type=str, location='json',
choices=('high_quality', 'economy'),
help='Invalid indexing technique.')
args = parser.parse_args()
# The role of the current user in the ta table must be admin or owner
if current_user.current_tenant.current_role not in ['admin', 'owner']:
raise Forbidden()
try:
dataset = DatasetService.create_empty_dataset(
tenant_id=current_user.current_tenant_id,
name=args['name'],
indexing_technique=args['indexing_technique'],
account=current_user
)
except services.errors.dataset.DatasetNameDuplicateError:
raise DatasetNameDuplicateError()
return marshal(dataset, dataset_detail_fields), 201
class DatasetApi(Resource):
@setup_required
@login_required
@account_initialization_required
def get(self, dataset_id):
dataset_id_str = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id_str)
if dataset is None:
raise NotFound("Dataset not found.")
try:
DatasetService.check_dataset_permission(
dataset, current_user)
except services.errors.account.NoPermissionError as e:
raise Forbidden(str(e))
return marshal(dataset, dataset_detail_fields), 200
@setup_required
@login_required
@account_initialization_required
def patch(self, dataset_id):
dataset_id_str = str(dataset_id)
parser = reqparse.RequestParser()
parser.add_argument('name', nullable=False,
help='type is required. Name must be between 1 to 40 characters.',
type=_validate_name)
parser.add_argument('description',
location='json', store_missing=False,
type=_validate_description_length)
parser.add_argument('indexing_technique', type=str, location='json',
choices=('high_quality', 'economy'),
help='Invalid indexing technique.')
parser.add_argument('permission', type=str, location='json', choices=(
'only_me', 'all_team_members'), help='Invalid permission.')
args = parser.parse_args()
# The role of the current user in the ta table must be admin or owner
if current_user.current_tenant.current_role not in ['admin', 'owner']:
raise Forbidden()
dataset = DatasetService.update_dataset(
dataset_id_str, args, current_user)
if dataset is None:
raise NotFound("Dataset not found.")
return marshal(dataset, dataset_detail_fields), 200
@setup_required
@login_required
@account_initialization_required
def delete(self, dataset_id):
dataset_id_str = str(dataset_id)
# The role of the current user in the ta table must be admin or owner
if current_user.current_tenant.current_role not in ['admin', 'owner']:
raise Forbidden()
if DatasetService.delete_dataset(dataset_id_str, current_user):
return {'result': 'success'}, 204
else:
raise NotFound("Dataset not found.")
class DatasetQueryApi(Resource):
@setup_required
@login_required
@account_initialization_required
def get(self, dataset_id):
dataset_id_str = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id_str)
if dataset is None:
raise NotFound("Dataset not found.")
try:
DatasetService.check_dataset_permission(dataset, current_user)
except services.errors.account.NoPermissionError as e:
raise Forbidden(str(e))
page = request.args.get('page', default=1, type=int)
limit = request.args.get('limit', default=20, type=int)
dataset_queries, total = DatasetService.get_dataset_queries(
dataset_id=dataset.id,
page=page,
per_page=limit
)
response = {
'data': marshal(dataset_queries, dataset_query_detail_fields),
'has_more': len(dataset_queries) == limit,
'limit': limit,
'total': total,
'page': page
}
return response, 200
class DatasetIndexingEstimateApi(Resource):
@setup_required
@login_required
@account_initialization_required
def post(self):
segment_rule = request.get_json()
file_detail = db.session.query(UploadFile).filter(
UploadFile.tenant_id == current_user.current_tenant_id,
UploadFile.id == segment_rule["file_id"]
).first()
if file_detail is None:
raise NotFound("File not found.")
indexing_runner = IndexingRunner()
response = indexing_runner.indexing_estimate(file_detail, segment_rule['process_rule'])
return response, 200
class DatasetRelatedAppListApi(Resource):
app_detail_kernel_fields = {
'id': fields.String,
'name': fields.String,
'mode': fields.String,
'icon': fields.String,
'icon_background': fields.String,
}
related_app_list = {
'data': fields.List(fields.Nested(app_detail_kernel_fields)),
'total': fields.Integer,
}
@setup_required
@login_required
@account_initialization_required
@marshal_with(related_app_list)
def get(self, dataset_id):
dataset_id_str = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id_str)
if dataset is None:
raise NotFound("Dataset not found.")
try:
DatasetService.check_dataset_permission(dataset, current_user)
except services.errors.account.NoPermissionError as e:
raise Forbidden(str(e))
app_dataset_joins = DatasetService.get_related_apps(dataset.id)
related_apps = []
for app_dataset_join in app_dataset_joins:
app_model = app_dataset_join.app
if app_model:
related_apps.append(app_model)
return {
'data': related_apps,
'total': len(related_apps)
}, 200
api.add_resource(DatasetListApi, '/datasets')
api.add_resource(DatasetApi, '/datasets/<uuid:dataset_id>')
api.add_resource(DatasetQueryApi, '/datasets/<uuid:dataset_id>/queries')
api.add_resource(DatasetIndexingEstimateApi, '/datasets/file-indexing-estimate')
api.add_resource(DatasetRelatedAppListApi, '/datasets/<uuid:dataset_id>/related-apps')

View File

@ -0,0 +1,682 @@
# -*- coding:utf-8 -*-
import random
from datetime import datetime
from flask import request
from flask_login import login_required, current_user
from flask_restful import Resource, fields, marshal, marshal_with, reqparse
from sqlalchemy import desc, asc
from werkzeug.exceptions import NotFound, Forbidden
import services
from controllers.console import api
from controllers.console.app.error import ProviderNotInitializeError
from controllers.console.datasets.error import DocumentAlreadyFinishedError, InvalidActionError, DocumentIndexingError, \
InvalidMetadataError, ArchivedDocumentImmutableError
from controllers.console.setup import setup_required
from controllers.console.wraps import account_initialization_required
from core.indexing_runner import IndexingRunner
from core.llm.error import ProviderTokenNotInitError
from extensions.ext_redis import redis_client
from libs.helper import TimestampField
from extensions.ext_database import db
from models.dataset import DatasetProcessRule, Dataset
from models.dataset import Document, DocumentSegment
from models.model import UploadFile
from services.dataset_service import DocumentService, DatasetService
from tasks.add_document_to_index_task import add_document_to_index_task
from tasks.remove_document_from_index_task import remove_document_from_index_task
dataset_fields = {
'id': fields.String,
'name': fields.String,
'description': fields.String,
'permission': fields.String,
'data_source_type': fields.String,
'indexing_technique': fields.String,
'created_by': fields.String,
'created_at': TimestampField,
}
document_fields = {
'id': fields.String,
'position': fields.Integer,
'data_source_type': fields.String,
'data_source_info': fields.Raw(attribute='data_source_info_dict'),
'dataset_process_rule_id': fields.String,
'name': fields.String,
'created_from': fields.String,
'created_by': fields.String,
'created_at': TimestampField,
'tokens': fields.Integer,
'indexing_status': fields.String,
'error': fields.String,
'enabled': fields.Boolean,
'disabled_at': TimestampField,
'disabled_by': fields.String,
'archived': fields.Boolean,
'display_status': fields.String,
'word_count': fields.Integer,
'hit_count': fields.Integer,
}
class DocumentResource(Resource):
def get_document(self, dataset_id: str, document_id: str) -> Document:
dataset = DatasetService.get_dataset(dataset_id)
if not dataset:
raise NotFound('Dataset not found.')
try:
DatasetService.check_dataset_permission(dataset, current_user)
except services.errors.account.NoPermissionError as e:
raise Forbidden(str(e))
document = DocumentService.get_document(dataset_id, document_id)
if not document:
raise NotFound('Document not found.')
if document.tenant_id != current_user.current_tenant_id:
raise Forbidden('No permission.')
return document
class GetProcessRuleApi(Resource):
@setup_required
@login_required
@account_initialization_required
def get(self):
req_data = request.args
document_id = req_data.get('document_id')
if document_id:
# get the latest process rule
document = Document.query.get_or_404(document_id)
dataset = DatasetService.get_dataset(document.dataset_id)
if not dataset:
raise NotFound('Dataset not found.')
try:
DatasetService.check_dataset_permission(dataset, current_user)
except services.errors.account.NoPermissionError as e:
raise Forbidden(str(e))
# get the latest process rule
dataset_process_rule = db.session.query(DatasetProcessRule). \
filter(DatasetProcessRule.dataset_id == document.dataset_id). \
order_by(DatasetProcessRule.created_at.desc()). \
limit(1). \
one_or_none()
mode = dataset_process_rule.mode
rules = dataset_process_rule.rules_dict
else:
mode = DocumentService.DEFAULT_RULES['mode']
rules = DocumentService.DEFAULT_RULES['rules']
return {
'mode': mode,
'rules': rules
}
class DatasetDocumentListApi(Resource):
@setup_required
@login_required
@account_initialization_required
def get(self, dataset_id):
dataset_id = str(dataset_id)
page = request.args.get('page', default=1, type=int)
limit = request.args.get('limit', default=20, type=int)
search = request.args.get('search', default=None, type=str)
sort = request.args.get('sort', default='-created_at', type=str)
dataset = DatasetService.get_dataset(dataset_id)
if not dataset:
raise NotFound('Dataset not found.')
try:
DatasetService.check_dataset_permission(dataset, current_user)
except services.errors.account.NoPermissionError as e:
raise Forbidden(str(e))
query = Document.query.filter_by(
dataset_id=str(dataset_id), tenant_id=current_user.current_tenant_id)
if search:
search = f'%{search}%'
query = query.filter(Document.name.like(search))
if sort.startswith('-'):
sort_logic = desc
sort = sort[1:]
else:
sort_logic = asc
if sort == 'hit_count':
sub_query = db.select(DocumentSegment.document_id,
db.func.sum(DocumentSegment.hit_count).label("total_hit_count")) \
.group_by(DocumentSegment.document_id) \
.subquery()
query = query.outerjoin(sub_query, sub_query.c.document_id == Document.id) \
.order_by(sort_logic(db.func.coalesce(sub_query.c.total_hit_count, 0)))
elif sort == 'created_at':
query = query.order_by(sort_logic(Document.created_at))
else:
query = query.order_by(desc(Document.created_at))
paginated_documents = query.paginate(
page=page, per_page=limit, max_per_page=100, error_out=False)
documents = paginated_documents.items
response = {
'data': marshal(documents, document_fields),
'has_more': len(documents) == limit,
'limit': limit,
'total': paginated_documents.total,
'page': page
}
return response
@setup_required
@login_required
@account_initialization_required
@marshal_with(document_fields)
def post(self, dataset_id):
dataset_id = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id)
if not dataset:
raise NotFound('Dataset not found.')
# The role of the current user in the ta table must be admin or owner
if current_user.current_tenant.current_role not in ['admin', 'owner']:
raise Forbidden()
try:
DatasetService.check_dataset_permission(dataset, current_user)
except services.errors.account.NoPermissionError as e:
raise Forbidden(str(e))
parser = reqparse.RequestParser()
parser.add_argument('indexing_technique', type=str, choices=Dataset.INDEXING_TECHNIQUE_LIST, nullable=False,
location='json')
parser.add_argument('data_source', type=dict, required=True, nullable=True, location='json')
parser.add_argument('process_rule', type=dict, required=True, nullable=True, location='json')
parser.add_argument('duplicate', type=bool, nullable=False, location='json')
args = parser.parse_args()
if not dataset.indexing_technique and not args['indexing_technique']:
raise ValueError('indexing_technique is required.')
# validate args
DocumentService.document_create_args_validate(args)
try:
document = DocumentService.save_document_with_dataset_id(dataset, args, current_user)
except ProviderTokenNotInitError:
raise ProviderNotInitializeError()
return document
class DatasetInitApi(Resource):
dataset_and_document_fields = {
'dataset': fields.Nested(dataset_fields),
'document': fields.Nested(document_fields)
}
@setup_required
@login_required
@account_initialization_required
@marshal_with(dataset_and_document_fields)
def post(self):
# The role of the current user in the ta table must be admin or owner
if current_user.current_tenant.current_role not in ['admin', 'owner']:
raise Forbidden()
parser = reqparse.RequestParser()
parser.add_argument('indexing_technique', type=str, choices=Dataset.INDEXING_TECHNIQUE_LIST, required=True,
nullable=False, location='json')
parser.add_argument('data_source', type=dict, required=True, nullable=True, location='json')
parser.add_argument('process_rule', type=dict, required=True, nullable=True, location='json')
args = parser.parse_args()
# validate args
DocumentService.document_create_args_validate(args)
try:
dataset, document = DocumentService.save_document_without_dataset_id(
tenant_id=current_user.current_tenant_id,
document_data=args,
account=current_user
)
except ProviderTokenNotInitError:
raise ProviderNotInitializeError()
response = {
'dataset': dataset,
'document': document
}
return response
class DocumentIndexingEstimateApi(DocumentResource):
@setup_required
@login_required
@account_initialization_required
def get(self, dataset_id, document_id):
dataset_id = str(dataset_id)
document_id = str(document_id)
document = self.get_document(dataset_id, document_id)
if document.indexing_status in ['completed', 'error']:
raise DocumentAlreadyFinishedError()
data_process_rule = document.dataset_process_rule
data_process_rule_dict = data_process_rule.to_dict()
response = {
"tokens": 0,
"total_price": 0,
"currency": "USD",
"total_segments": 0,
"preview": []
}
if document.data_source_type == 'upload_file':
data_source_info = document.data_source_info_dict
if data_source_info and 'upload_file_id' in data_source_info:
file_id = data_source_info['upload_file_id']
file = db.session.query(UploadFile).filter(
UploadFile.tenant_id == document.tenant_id,
UploadFile.id == file_id
).first()
# raise error if file not found
if not file:
raise NotFound('File not found.')
indexing_runner = IndexingRunner()
response = indexing_runner.indexing_estimate(file, data_process_rule_dict)
return response
class DocumentIndexingStatusApi(DocumentResource):
document_status_fields = {
'id': fields.String,
'indexing_status': fields.String,
'processing_started_at': TimestampField,
'parsing_completed_at': TimestampField,
'cleaning_completed_at': TimestampField,
'splitting_completed_at': TimestampField,
'completed_at': TimestampField,
'paused_at': TimestampField,
'error': fields.String,
'stopped_at': TimestampField,
'completed_segments': fields.Integer,
'total_segments': fields.Integer,
}
@setup_required
@login_required
@account_initialization_required
def get(self, dataset_id, document_id):
dataset_id = str(dataset_id)
document_id = str(document_id)
document = self.get_document(dataset_id, document_id)
completed_segments = DocumentSegment.query \
.filter(DocumentSegment.completed_at.isnot(None),
DocumentSegment.document_id == str(document_id)) \
.count()
total_segments = DocumentSegment.query \
.filter_by(document_id=str(document_id)) \
.count()
document.completed_segments = completed_segments
document.total_segments = total_segments
return marshal(document, self.document_status_fields)
class DocumentDetailApi(DocumentResource):
METADATA_CHOICES = {'all', 'only', 'without'}
@setup_required
@login_required
@account_initialization_required
def get(self, dataset_id, document_id):
dataset_id = str(dataset_id)
document_id = str(document_id)
document = self.get_document(dataset_id, document_id)
metadata = request.args.get('metadata', 'all')
if metadata not in self.METADATA_CHOICES:
raise InvalidMetadataError(f'Invalid metadata value: {metadata}')
if metadata == 'only':
response = {
'id': document.id,
'doc_type': document.doc_type,
'doc_metadata': document.doc_metadata
}
elif metadata == 'without':
process_rules = DatasetService.get_process_rules(dataset_id)
data_source_info = document.data_source_detail_dict
response = {
'id': document.id,
'position': document.position,
'data_source_type': document.data_source_type,
'data_source_info': data_source_info,
'dataset_process_rule_id': document.dataset_process_rule_id,
'dataset_process_rule': process_rules,
'name': document.name,
'created_from': document.created_from,
'created_by': document.created_by,
'created_at': document.created_at.timestamp(),
'tokens': document.tokens,
'indexing_status': document.indexing_status,
'completed_at': int(document.completed_at.timestamp()) if document.completed_at else None,
'updated_at': int(document.updated_at.timestamp()) if document.updated_at else None,
'indexing_latency': document.indexing_latency,
'error': document.error,
'enabled': document.enabled,
'disabled_at': int(document.disabled_at.timestamp()) if document.disabled_at else None,
'disabled_by': document.disabled_by,
'archived': document.archived,
'segment_count': document.segment_count,
'average_segment_length': document.average_segment_length,
'hit_count': document.hit_count,
'display_status': document.display_status
}
else:
process_rules = DatasetService.get_process_rules(dataset_id)
data_source_info = document.data_source_detail_dict_()
response = {
'id': document.id,
'position': document.position,
'data_source_type': document.data_source_type,
'data_source_info': data_source_info,
'dataset_process_rule_id': document.dataset_process_rule_id,
'dataset_process_rule': process_rules,
'name': document.name,
'created_from': document.created_from,
'created_by': document.created_by,
'created_at': document.created_at.timestamp(),
'tokens': document.tokens,
'indexing_status': document.indexing_status,
'completed_at': int(document.completed_at.timestamp())if document.completed_at else None,
'updated_at': int(document.updated_at.timestamp()) if document.updated_at else None,
'indexing_latency': document.indexing_latency,
'error': document.error,
'enabled': document.enabled,
'disabled_at': int(document.disabled_at.timestamp()) if document.disabled_at else None,
'disabled_by': document.disabled_by,
'archived': document.archived,
'doc_type': document.doc_type,
'doc_metadata': document.doc_metadata,
'segment_count': document.segment_count,
'average_segment_length': document.average_segment_length,
'hit_count': document.hit_count,
'display_status': document.display_status
}
return response, 200
class DocumentProcessingApi(DocumentResource):
@setup_required
@login_required
@account_initialization_required
def patch(self, dataset_id, document_id, action):
dataset_id = str(dataset_id)
document_id = str(document_id)
document = self.get_document(dataset_id, document_id)
# The role of the current user in the ta table must be admin or owner
if current_user.current_tenant.current_role not in ['admin', 'owner']:
raise Forbidden()
if action == "pause":
if document.indexing_status != "indexing":
raise InvalidActionError('Document not in indexing state.')
document.paused_by = current_user.id
document.paused_at = datetime.utcnow()
document.is_paused = True
db.session.commit()
elif action == "resume":
if document.indexing_status not in ["paused", "error"]:
raise InvalidActionError('Document not in paused or error state.')
document.paused_by = None
document.paused_at = None
document.is_paused = False
db.session.commit()
else:
raise InvalidActionError()
return {'result': 'success'}, 200
class DocumentDeleteApi(DocumentResource):
@setup_required
@login_required
@account_initialization_required
def delete(self, dataset_id, document_id):
dataset_id = str(dataset_id)
document_id = str(document_id)
document = self.get_document(dataset_id, document_id)
try:
DocumentService.delete_document(document)
except services.errors.document.DocumentIndexingError:
raise DocumentIndexingError('Cannot delete document during indexing.')
return {'result': 'success'}, 204
class DocumentMetadataApi(DocumentResource):
@setup_required
@login_required
@account_initialization_required
def put(self, dataset_id, document_id):
dataset_id = str(dataset_id)
document_id = str(document_id)
document = self.get_document(dataset_id, document_id)
req_data = request.get_json()
doc_type = req_data.get('doc_type')
doc_metadata = req_data.get('doc_metadata')
# The role of the current user in the ta table must be admin or owner
if current_user.current_tenant.current_role not in ['admin', 'owner']:
raise Forbidden()
if doc_type is None or doc_metadata is None:
raise ValueError('Both doc_type and doc_metadata must be provided.')
if doc_type not in DocumentService.DOCUMENT_METADATA_SCHEMA:
raise ValueError('Invalid doc_type.')
if not isinstance(doc_metadata, dict):
raise ValueError('doc_metadata must be a dictionary.')
metadata_schema = DocumentService.DOCUMENT_METADATA_SCHEMA[doc_type]
document.doc_metadata = {}
for key, value_type in metadata_schema.items():
value = doc_metadata.get(key)
if value is not None and isinstance(value, value_type):
document.doc_metadata[key] = value
document.doc_type = doc_type
document.updated_at = datetime.utcnow()
db.session.commit()
return {'result': 'success', 'message': 'Document metadata updated.'}, 200
class DocumentStatusApi(DocumentResource):
@setup_required
@login_required
@account_initialization_required
def patch(self, dataset_id, document_id, action):
dataset_id = str(dataset_id)
document_id = str(document_id)
document = self.get_document(dataset_id, document_id)
# The role of the current user in the ta table must be admin or owner
if current_user.current_tenant.current_role not in ['admin', 'owner']:
raise Forbidden()
indexing_cache_key = 'document_{}_indexing'.format(document.id)
cache_result = redis_client.get(indexing_cache_key)
if cache_result is not None:
raise InvalidActionError("Document is being indexed, please try again later")
if action == "enable":
if document.enabled:
raise InvalidActionError('Document already enabled.')
document.enabled = True
document.disabled_at = None
document.disabled_by = None
document.updated_at = datetime.utcnow()
db.session.commit()
# Set cache to prevent indexing the same document multiple times
redis_client.setex(indexing_cache_key, 600, 1)
add_document_to_index_task.delay(document_id)
return {'result': 'success'}, 200
elif action == "disable":
if not document.enabled:
raise InvalidActionError('Document already disabled.')
document.enabled = False
document.disabled_at = datetime.utcnow()
document.disabled_by = current_user.id
document.updated_at = datetime.utcnow()
db.session.commit()
# Set cache to prevent indexing the same document multiple times
redis_client.setex(indexing_cache_key, 600, 1)
remove_document_from_index_task.delay(document_id)
return {'result': 'success'}, 200
elif action == "archive":
if document.archived:
raise InvalidActionError('Document already archived.')
document.archived = True
document.archived_at = datetime.utcnow()
document.archived_by = current_user.id
document.updated_at = datetime.utcnow()
db.session.commit()
if document.enabled:
# Set cache to prevent indexing the same document multiple times
redis_client.setex(indexing_cache_key, 600, 1)
remove_document_from_index_task.delay(document_id)
return {'result': 'success'}, 200
else:
raise InvalidActionError()
class DocumentPauseApi(DocumentResource):
def patch(self, dataset_id, document_id):
"""pause document."""
dataset_id = str(dataset_id)
document_id = str(document_id)
dataset = DatasetService.get_dataset(dataset_id)
if not dataset:
raise NotFound('Dataset not found.')
document = DocumentService.get_document(dataset.id, document_id)
# 404 if document not found
if document is None:
raise NotFound("Document Not Exists.")
# 403 if document is archived
if DocumentService.check_archived(document):
raise ArchivedDocumentImmutableError()
try:
# pause document
DocumentService.pause_document(document)
except services.errors.document.DocumentIndexingError:
raise DocumentIndexingError('Cannot pause completed document.')
return {'result': 'success'}, 204
class DocumentRecoverApi(DocumentResource):
def patch(self, dataset_id, document_id):
"""recover document."""
dataset_id = str(dataset_id)
document_id = str(document_id)
dataset = DatasetService.get_dataset(dataset_id)
if not dataset:
raise NotFound('Dataset not found.')
document = DocumentService.get_document(dataset.id, document_id)
# 404 if document not found
if document is None:
raise NotFound("Document Not Exists.")
# 403 if document is archived
if DocumentService.check_archived(document):
raise ArchivedDocumentImmutableError()
try:
# pause document
DocumentService.recover_document(document)
except services.errors.document.DocumentIndexingError:
raise DocumentIndexingError('Document is not in paused status.')
return {'result': 'success'}, 204
api.add_resource(GetProcessRuleApi, '/datasets/process-rule')
api.add_resource(DatasetDocumentListApi,
'/datasets/<uuid:dataset_id>/documents')
api.add_resource(DatasetInitApi,
'/datasets/init')
api.add_resource(DocumentIndexingEstimateApi,
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/indexing-estimate')
api.add_resource(DocumentIndexingStatusApi,
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/indexing-status')
api.add_resource(DocumentDetailApi,
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>')
api.add_resource(DocumentProcessingApi,
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/processing/<string:action>')
api.add_resource(DocumentDeleteApi,
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>')
api.add_resource(DocumentMetadataApi,
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/metadata')
api.add_resource(DocumentStatusApi,
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/status/<string:action>')
api.add_resource(DocumentPauseApi, '/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/processing/pause')
api.add_resource(DocumentRecoverApi, '/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/processing/resume')

View File

@ -0,0 +1,203 @@
# -*- coding:utf-8 -*-
from datetime import datetime
from flask_login import login_required, current_user
from flask_restful import Resource, reqparse, fields, marshal
from werkzeug.exceptions import NotFound, Forbidden
import services
from controllers.console import api
from controllers.console.datasets.error import InvalidActionError
from controllers.console.setup import setup_required
from controllers.console.wraps import account_initialization_required
from extensions.ext_database import db
from extensions.ext_redis import redis_client
from models.dataset import DocumentSegment
from libs.helper import TimestampField
from services.dataset_service import DatasetService, DocumentService
from tasks.add_segment_to_index_task import add_segment_to_index_task
from tasks.remove_segment_from_index_task import remove_segment_from_index_task
segment_fields = {
'id': fields.String,
'position': fields.Integer,
'document_id': fields.String,
'content': fields.String,
'word_count': fields.Integer,
'tokens': fields.Integer,
'keywords': fields.List(fields.String),
'index_node_id': fields.String,
'index_node_hash': fields.String,
'hit_count': fields.Integer,
'enabled': fields.Boolean,
'disabled_at': TimestampField,
'disabled_by': fields.String,
'status': fields.String,
'created_by': fields.String,
'created_at': TimestampField,
'indexing_at': TimestampField,
'completed_at': TimestampField,
'error': fields.String,
'stopped_at': TimestampField
}
segment_list_response = {
'data': fields.List(fields.Nested(segment_fields)),
'has_more': fields.Boolean,
'limit': fields.Integer
}
class DatasetDocumentSegmentListApi(Resource):
@setup_required
@login_required
@account_initialization_required
def get(self, dataset_id, document_id):
dataset_id = str(dataset_id)
document_id = str(document_id)
dataset = DatasetService.get_dataset(dataset_id)
if not dataset:
raise NotFound('Dataset not found.')
try:
DatasetService.check_dataset_permission(dataset, current_user)
except services.errors.account.NoPermissionError as e:
raise Forbidden(str(e))
document = DocumentService.get_document(dataset_id, document_id)
if not document:
raise NotFound('Document not found.')
parser = reqparse.RequestParser()
parser.add_argument('last_id', type=str, default=None, location='args')
parser.add_argument('limit', type=int, default=20, location='args')
parser.add_argument('status', type=str,
action='append', default=[], location='args')
parser.add_argument('hit_count_gte', type=int,
default=None, location='args')
parser.add_argument('enabled', type=str, default='all', location='args')
args = parser.parse_args()
last_id = args['last_id']
limit = min(args['limit'], 100)
status_list = args['status']
hit_count_gte = args['hit_count_gte']
query = DocumentSegment.query.filter(
DocumentSegment.document_id == str(document_id),
DocumentSegment.tenant_id == current_user.current_tenant_id
)
if last_id is not None:
last_segment = DocumentSegment.query.get(str(last_id))
if last_segment:
query = query.filter(
DocumentSegment.position > last_segment.position)
else:
return {'data': [], 'has_more': False, 'limit': limit}, 200
if status_list:
query = query.filter(DocumentSegment.status.in_(status_list))
if hit_count_gte is not None:
query = query.filter(DocumentSegment.hit_count >= hit_count_gte)
if args['enabled'].lower() != 'all':
if args['enabled'].lower() == 'true':
query = query.filter(DocumentSegment.enabled == True)
elif args['enabled'].lower() == 'false':
query = query.filter(DocumentSegment.enabled == False)
total = query.count()
segments = query.order_by(DocumentSegment.position).limit(limit + 1).all()
has_more = False
if len(segments) > limit:
has_more = True
segments = segments[:-1]
return {
'data': marshal(segments, segment_fields),
'has_more': has_more,
'limit': limit,
'total': total
}, 200
class DatasetDocumentSegmentApi(Resource):
@setup_required
@login_required
@account_initialization_required
def patch(self, dataset_id, segment_id, action):
dataset_id = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id)
if not dataset:
raise NotFound('Dataset not found.')
# The role of the current user in the ta table must be admin or owner
if current_user.current_tenant.current_role not in ['admin', 'owner']:
raise Forbidden()
try:
DatasetService.check_dataset_permission(dataset, current_user)
except services.errors.account.NoPermissionError as e:
raise Forbidden(str(e))
segment = DocumentSegment.query.filter(
DocumentSegment.id == str(segment_id),
DocumentSegment.tenant_id == current_user.current_tenant_id
).first()
if not segment:
raise NotFound('Segment not found.')
document_indexing_cache_key = 'document_{}_indexing'.format(segment.document_id)
cache_result = redis_client.get(document_indexing_cache_key)
if cache_result is not None:
raise InvalidActionError("Document is being indexed, please try again later")
indexing_cache_key = 'segment_{}_indexing'.format(segment.id)
cache_result = redis_client.get(indexing_cache_key)
if cache_result is not None:
raise InvalidActionError("Segment is being indexed, please try again later")
if action == "enable":
if segment.enabled:
raise InvalidActionError("Segment is already enabled.")
segment.enabled = True
segment.disabled_at = None
segment.disabled_by = None
db.session.commit()
# Set cache to prevent indexing the same segment multiple times
redis_client.setex(indexing_cache_key, 600, 1)
add_segment_to_index_task.delay(segment.id)
return {'result': 'success'}, 200
elif action == "disable":
if not segment.enabled:
raise InvalidActionError("Segment is already disabled.")
segment.enabled = False
segment.disabled_at = datetime.utcnow()
segment.disabled_by = current_user.id
db.session.commit()
# Set cache to prevent indexing the same segment multiple times
redis_client.setex(indexing_cache_key, 600, 1)
remove_segment_from_index_task.delay(segment.id)
return {'result': 'success'}, 200
else:
raise InvalidActionError()
api.add_resource(DatasetDocumentSegmentListApi,
'/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments')
api.add_resource(DatasetDocumentSegmentApi,
'/datasets/<uuid:dataset_id>/segments/<uuid:segment_id>/<string:action>')

View File

@ -0,0 +1,73 @@
from libs.exception import BaseHTTPException
class NoFileUploadedError(BaseHTTPException):
error_code = 'no_file_uploaded'
description = "No file uploaded."
code = 400
class TooManyFilesError(BaseHTTPException):
error_code = 'too_many_files'
description = "Only one file is allowed."
code = 400
class FileTooLargeError(BaseHTTPException):
error_code = 'file_too_large'
description = "File size exceeded. {message}"
code = 413
class UnsupportedFileTypeError(BaseHTTPException):
error_code = 'unsupported_file_type'
description = "File type not allowed."
code = 415
class HighQualityDatasetOnlyError(BaseHTTPException):
error_code = 'high_quality_dataset_only'
description = "High quality dataset only."
code = 400
class DatasetNotInitializedError(BaseHTTPException):
error_code = 'dataset_not_initialized'
description = "Dataset not initialized."
code = 400
class ArchivedDocumentImmutableError(BaseHTTPException):
error_code = 'archived_document_immutable'
description = "Cannot process an archived document."
code = 403
class DatasetNameDuplicateError(BaseHTTPException):
error_code = 'dataset_name_duplicate'
description = "Dataset name already exists."
code = 409
class InvalidActionError(BaseHTTPException):
error_code = 'invalid_action'
description = "Invalid action."
code = 400
class DocumentAlreadyFinishedError(BaseHTTPException):
error_code = 'document_already_finished'
description = "Document already finished."
code = 400
class DocumentIndexingError(BaseHTTPException):
error_code = 'document_indexing'
description = "Document indexing."
code = 400
class InvalidMetadataError(BaseHTTPException):
error_code = 'invalid_metadata'
description = "Invalid metadata."
code = 400

View File

@ -0,0 +1,147 @@
import datetime
import hashlib
import tempfile
import time
import uuid
from pathlib import Path
from cachetools import TTLCache
from flask import request, current_app
from flask_login import login_required, current_user
from flask_restful import Resource, marshal_with, fields
from werkzeug.exceptions import NotFound
from controllers.console import api
from controllers.console.datasets.error import NoFileUploadedError, TooManyFilesError, FileTooLargeError, \
UnsupportedFileTypeError
from controllers.console.setup import setup_required
from controllers.console.wraps import account_initialization_required
from core.index.readers.html_parser import HTMLParser
from core.index.readers.pdf_parser import PDFParser
from extensions.ext_storage import storage
from libs.helper import TimestampField
from extensions.ext_database import db
from models.model import UploadFile
cache = TTLCache(maxsize=None, ttl=30)
FILE_SIZE_LIMIT = 15 * 1024 * 1024 # 15MB
ALLOWED_EXTENSIONS = ['txt', 'markdown', 'md', 'pdf', 'html', 'htm']
PREVIEW_WORDS_LIMIT = 3000
class FileApi(Resource):
file_fields = {
'id': fields.String,
'name': fields.String,
'size': fields.Integer,
'extension': fields.String,
'mime_type': fields.String,
'created_by': fields.String,
'created_at': TimestampField,
}
@setup_required
@login_required
@account_initialization_required
@marshal_with(file_fields)
def post(self):
# get file from request
file = request.files['file']
# check file
if 'file' not in request.files:
raise NoFileUploadedError()
if len(request.files) > 1:
raise TooManyFilesError()
file_content = file.read()
file_size = len(file_content)
if file_size > FILE_SIZE_LIMIT:
message = "({file_size} > {FILE_SIZE_LIMIT})"
raise FileTooLargeError(message)
extension = file.filename.split('.')[-1]
if extension not in ALLOWED_EXTENSIONS:
raise UnsupportedFileTypeError()
# user uuid as file name
file_uuid = str(uuid.uuid4())
file_key = 'upload_files/' + current_user.current_tenant_id + '/' + file_uuid + '.' + extension
# save file to storage
storage.save(file_key, file_content)
# save file to db
config = current_app.config
upload_file = UploadFile(
tenant_id=current_user.current_tenant_id,
storage_type=config['STORAGE_TYPE'],
key=file_key,
name=file.filename,
size=file_size,
extension=extension,
mime_type=file.mimetype,
created_by=current_user.id,
created_at=datetime.datetime.utcnow(),
used=False,
hash=hashlib.sha3_256(file_content).hexdigest()
)
db.session.add(upload_file)
db.session.commit()
return upload_file, 201
class FilePreviewApi(Resource):
@setup_required
@login_required
@account_initialization_required
def get(self, file_id):
file_id = str(file_id)
key = file_id + request.path
cached_response = cache.get(key)
if cached_response and time.time() - cached_response['timestamp'] < cache.ttl:
return cached_response['response']
upload_file = db.session.query(UploadFile) \
.filter(UploadFile.id == file_id) \
.first()
if not upload_file:
raise NotFound("File not found")
# extract text from file
extension = upload_file.extension
if extension not in ALLOWED_EXTENSIONS:
raise UnsupportedFileTypeError()
with tempfile.TemporaryDirectory() as temp_dir:
suffix = Path(upload_file.key).suffix
filepath = f"{temp_dir}/{next(tempfile._get_candidate_names())}{suffix}"
storage.download(upload_file.key, filepath)
if extension == 'pdf':
parser = PDFParser({'upload_file': upload_file})
text = parser.parse_file(Path(filepath))
elif extension in ['html', 'htm']:
# Use BeautifulSoup to extract text
parser = HTMLParser()
text = parser.parse_file(Path(filepath))
else:
# ['txt', 'markdown', 'md']
with open(filepath, "rb") as fp:
data = fp.read()
text = data.decode(encoding='utf-8').strip() if data else ''
text = text[0:PREVIEW_WORDS_LIMIT] if text else ''
return {'content': text}
api.add_resource(FileApi, '/files/upload')
api.add_resource(FilePreviewApi, '/files/<uuid:file_id>/preview')

View File

@ -0,0 +1,100 @@
import logging
from flask_login import login_required, current_user
from flask_restful import Resource, reqparse, marshal, fields
from werkzeug.exceptions import InternalServerError, NotFound, Forbidden
import services
from controllers.console import api
from controllers.console.datasets.error import HighQualityDatasetOnlyError, DatasetNotInitializedError
from controllers.console.setup import setup_required
from controllers.console.wraps import account_initialization_required
from libs.helper import TimestampField
from services.dataset_service import DatasetService
from services.hit_testing_service import HitTestingService
document_fields = {
'id': fields.String,
'data_source_type': fields.String,
'name': fields.String,
'doc_type': fields.String,
}
segment_fields = {
'id': fields.String,
'position': fields.Integer,
'document_id': fields.String,
'content': fields.String,
'word_count': fields.Integer,
'tokens': fields.Integer,
'keywords': fields.List(fields.String),
'index_node_id': fields.String,
'index_node_hash': fields.String,
'hit_count': fields.Integer,
'enabled': fields.Boolean,
'disabled_at': TimestampField,
'disabled_by': fields.String,
'status': fields.String,
'created_by': fields.String,
'created_at': TimestampField,
'indexing_at': TimestampField,
'completed_at': TimestampField,
'error': fields.String,
'stopped_at': TimestampField,
'document': fields.Nested(document_fields),
}
hit_testing_record_fields = {
'segment': fields.Nested(segment_fields),
'score': fields.Float,
'tsne_position': fields.Raw
}
class HitTestingApi(Resource):
@setup_required
@login_required
@account_initialization_required
def post(self, dataset_id):
dataset_id_str = str(dataset_id)
dataset = DatasetService.get_dataset(dataset_id_str)
if dataset is None:
raise NotFound("Dataset not found.")
try:
DatasetService.check_dataset_permission(dataset, current_user)
except services.errors.account.NoPermissionError as e:
raise Forbidden(str(e))
# only high quality dataset can be used for hit testing
if dataset.indexing_technique != 'high_quality':
raise HighQualityDatasetOnlyError()
parser = reqparse.RequestParser()
parser.add_argument('query', type=str, location='json')
args = parser.parse_args()
query = args['query']
if not query or len(query) > 250:
raise ValueError('Query is required and cannot exceed 250 characters')
try:
response = HitTestingService.retrieve(
dataset=dataset,
query=query,
account=current_user,
limit=10,
)
return {"query": response['query'], 'records': marshal(response['records'], hit_testing_record_fields)}
except services.errors.index.IndexNotInitializedError:
raise DatasetNotInitializedError()
except Exception as e:
logging.exception("Hit testing failed.")
raise InternalServerError(str(e))
api.add_resource(HitTestingApi, '/datasets/<uuid:dataset_id>/hit-testing')

View File

@ -0,0 +1,19 @@
from libs.exception import BaseHTTPException
class AlreadySetupError(BaseHTTPException):
error_code = 'already_setup'
description = "Application already setup."
code = 403
class NotSetupError(BaseHTTPException):
error_code = 'not_setup'
description = "Application not setup."
code = 401
class AccountNotLinkTenantError(BaseHTTPException):
error_code = 'account_not_link_tenant'
description = "Account not link tenant."
code = 403

View File

@ -0,0 +1,93 @@
# -*- coding:utf-8 -*-
from functools import wraps
import flask_login
from flask import request, current_app
from flask_restful import Resource, reqparse
from extensions.ext_database import db
from models.model import DifySetup
from services.account_service import AccountService, TenantService, RegisterService
from libs.helper import email, str_len
from libs.password import valid_password
from . import api
from .error import AlreadySetupError, NotSetupError
from .wraps import only_edition_self_hosted
class SetupApi(Resource):
@only_edition_self_hosted
def get(self):
setup_status = get_setup_status()
if setup_status:
return {
'step': 'finished',
'setup_at': setup_status.setup_at.isoformat()
}
return {'step': 'not_start'}
@only_edition_self_hosted
def post(self):
# is set up
if get_setup_status():
raise AlreadySetupError()
# is tenant created
tenant_count = TenantService.get_tenant_count()
if tenant_count > 0:
raise AlreadySetupError()
parser = reqparse.RequestParser()
parser.add_argument('email', type=email,
required=True, location='json')
parser.add_argument('name', type=str_len(
30), required=True, location='json')
parser.add_argument('password', type=valid_password,
required=True, location='json')
args = parser.parse_args()
# Register
account = RegisterService.register(
email=args['email'],
name=args['name'],
password=args['password']
)
setup()
# Login
flask_login.login_user(account)
AccountService.update_last_login(account, request)
return {'result': 'success'}, 201
def setup():
dify_setup = DifySetup(
version=current_app.config['CURRENT_VERSION']
)
db.session.add(dify_setup)
def setup_required(view):
@wraps(view)
def decorated(*args, **kwargs):
# check setup
if not get_setup_status():
raise NotSetupError()
return view(*args, **kwargs)
return decorated
def get_setup_status():
if current_app.config['EDITION'] == 'SELF_HOSTED':
return DifySetup.query.first()
else:
return True
api.add_resource(SetupApi, '/setup')

View File

@ -0,0 +1,39 @@
# -*- coding:utf-8 -*-
import json
import logging
import requests
from flask import current_app
from flask_restful import reqparse, Resource
from werkzeug.exceptions import InternalServerError
from . import api
class VersionApi(Resource):
def get(self):
parser = reqparse.RequestParser()
parser.add_argument('current_version', type=str, required=True, location='args')
args = parser.parse_args()
check_update_url = current_app.config['CHECK_UPDATE_URL']
try:
response = requests.get(check_update_url, {
'current_version': args.get('current_version')
})
except Exception as error:
logging.exception("Check update error.")
raise InternalServerError()
content = json.loads(response.content)
return {
'version': content['version'],
'release_date': content['releaseDate'],
'release_notes': content['releaseNotes'],
'can_auto_update': content['canAutoUpdate']
}
api.add_resource(VersionApi, '/version')

View File

@ -0,0 +1,263 @@
# -*- coding:utf-8 -*-
from datetime import datetime
import pytz
from flask import current_app, request
from flask_login import login_required, current_user
from flask_restful import Resource, reqparse, fields, marshal_with
from controllers.console import api
from controllers.console.setup import setup_required
from controllers.console.workspace.error import AccountAlreadyInitedError, InvalidInvitationCodeError, \
RepeatPasswordNotMatchError
from controllers.console.wraps import account_initialization_required
from libs.helper import TimestampField, supported_language, timezone
from extensions.ext_database import db
from models.account import InvitationCode, AccountIntegrate
from services.account_service import AccountService
account_fields = {
'id': fields.String,
'name': fields.String,
'avatar': fields.String,
'email': fields.String,
'interface_language': fields.String,
'interface_theme': fields.String,
'timezone': fields.String,
'last_login_at': TimestampField,
'last_login_ip': fields.String,
'created_at': TimestampField
}
class AccountInitApi(Resource):
@setup_required
@login_required
def post(self):
account = current_user
if account.status == 'active':
raise AccountAlreadyInitedError()
parser = reqparse.RequestParser()
if current_app.config['EDITION'] == 'CLOUD':
parser.add_argument('invitation_code', type=str, location='json')
parser.add_argument(
'interface_language', type=supported_language, required=True, location='json')
parser.add_argument('timezone', type=timezone,
required=True, location='json')
args = parser.parse_args()
if current_app.config['EDITION'] == 'CLOUD':
if not args['invitation_code']:
raise ValueError('invitation_code is required')
# check invitation code
invitation_code = db.session.query(InvitationCode).filter(
InvitationCode.code == args['invitation_code'],
InvitationCode.status == 'unused',
).first()
if not invitation_code:
raise InvalidInvitationCodeError()
invitation_code.status = 'used'
invitation_code.used_at = datetime.utcnow()
invitation_code.used_by_tenant_id = account.current_tenant_id
invitation_code.used_by_account_id = account.id
account.interface_language = args['interface_language']
account.timezone = args['timezone']
account.interface_theme = 'light'
account.status = 'active'
account.initialized_at = datetime.utcnow()
db.session.commit()
return {'result': 'success'}
class AccountProfileApi(Resource):
@setup_required
@login_required
@account_initialization_required
@marshal_with(account_fields)
def get(self):
return current_user
class AccountNameApi(Resource):
@setup_required
@login_required
@account_initialization_required
@marshal_with(account_fields)
def post(self):
parser = reqparse.RequestParser()
parser.add_argument('name', type=str, required=True, location='json')
args = parser.parse_args()
# Validate account name length
if len(args['name']) < 3 or len(args['name']) > 30:
raise ValueError(
"Account name must be between 3 and 30 characters.")
updated_account = AccountService.update_account(current_user, name=args['name'])
return updated_account
class AccountAvatarApi(Resource):
@setup_required
@login_required
@account_initialization_required
@marshal_with(account_fields)
def post(self):
parser = reqparse.RequestParser()
parser.add_argument('avatar', type=str, required=True, location='json')
args = parser.parse_args()
updated_account = AccountService.update_account(current_user, avatar=args['avatar'])
return updated_account
class AccountInterfaceLanguageApi(Resource):
@setup_required
@login_required
@account_initialization_required
@marshal_with(account_fields)
def post(self):
parser = reqparse.RequestParser()
parser.add_argument(
'interface_language', type=supported_language, required=True, location='json')
args = parser.parse_args()
updated_account = AccountService.update_account(current_user, interface_language=args['interface_language'])
return updated_account
class AccountInterfaceThemeApi(Resource):
@setup_required
@login_required
@account_initialization_required
@marshal_with(account_fields)
def post(self):
parser = reqparse.RequestParser()
parser.add_argument('interface_theme', type=str, choices=[
'light', 'dark'], required=True, location='json')
args = parser.parse_args()
updated_account = AccountService.update_account(current_user, interface_theme=args['interface_theme'])
return updated_account
class AccountTimezoneApi(Resource):
@setup_required
@login_required
@account_initialization_required
@marshal_with(account_fields)
def post(self):
parser = reqparse.RequestParser()
parser.add_argument('timezone', type=str,
required=True, location='json')
args = parser.parse_args()
# Validate timezone string, e.g. America/New_York, Asia/Shanghai
if args['timezone'] not in pytz.all_timezones:
raise ValueError("Invalid timezone string.")
updated_account = AccountService.update_account(current_user, timezone=args['timezone'])
return updated_account
class AccountPasswordApi(Resource):
@setup_required
@login_required
@account_initialization_required
@marshal_with(account_fields)
def post(self):
parser = reqparse.RequestParser()
parser.add_argument('password', type=str,
required=False, location='json')
parser.add_argument('new_password', type=str,
required=True, location='json')
parser.add_argument('repeat_new_password', type=str,
required=True, location='json')
args = parser.parse_args()
if args['new_password'] != args['repeat_new_password']:
raise RepeatPasswordNotMatchError()
AccountService.update_account_password(
current_user, args['password'], args['new_password'])
return {"result": "success"}
class AccountIntegrateApi(Resource):
integrate_fields = {
'provider': fields.String,
'created_at': TimestampField,
'is_bound': fields.Boolean,
'link': fields.String
}
integrate_list_fields = {
'data': fields.List(fields.Nested(integrate_fields)),
}
@setup_required
@login_required
@account_initialization_required
@marshal_with(integrate_list_fields)
def get(self):
account = current_user
account_integrates = db.session.query(AccountIntegrate).filter(
AccountIntegrate.account_id == account.id).all()
base_url = request.url_root.rstrip('/')
oauth_base_path = "/console/api/oauth/login"
providers = ["github", "google"]
integrate_data = []
for provider in providers:
existing_integrate = next((ai for ai in account_integrates if ai.provider == provider), None)
if existing_integrate:
integrate_data.append({
'id': existing_integrate.id,
'provider': provider,
'created_at': existing_integrate.created_at,
'is_bound': True,
'link': None
})
else:
integrate_data.append({
'id': None,
'provider': provider,
'created_at': None,
'is_bound': False,
'link': f'{base_url}{oauth_base_path}/{provider}'
})
return {'data': integrate_data}
# Register API resources
api.add_resource(AccountInitApi, '/account/init')
api.add_resource(AccountProfileApi, '/account/profile')
api.add_resource(AccountNameApi, '/account/name')
api.add_resource(AccountAvatarApi, '/account/avatar')
api.add_resource(AccountInterfaceLanguageApi, '/account/interface-language')
api.add_resource(AccountInterfaceThemeApi, '/account/interface-theme')
api.add_resource(AccountTimezoneApi, '/account/timezone')
api.add_resource(AccountPasswordApi, '/account/password')
api.add_resource(AccountIntegrateApi, '/account/integrates')
# api.add_resource(AccountEmailApi, '/account/email')
# api.add_resource(AccountEmailVerifyApi, '/account/email-verify')

View File

@ -0,0 +1,31 @@
from libs.exception import BaseHTTPException
class RepeatPasswordNotMatchError(BaseHTTPException):
error_code = 'repeat_password_not_match'
description = "New password and repeat password does not match."
code = 400
class ProviderRequestFailedError(BaseHTTPException):
error_code = 'provider_request_failed'
description = None
code = 400
class InvalidInvitationCodeError(BaseHTTPException):
error_code = 'invalid_invitation_code'
description = "Invalid invitation code."
code = 400
class AccountAlreadyInitedError(BaseHTTPException):
error_code = 'account_already_inited'
description = "Account already inited."
code = 400
class AccountNotInitializedError(BaseHTTPException):
error_code = 'account_not_initialized'
description = "Account not initialized."
code = 400

View File

@ -0,0 +1,141 @@
# -*- coding:utf-8 -*-
from flask_login import login_required, current_user
from flask_restful import Resource, reqparse, marshal_with, abort, fields, marshal
import services
from controllers.console import api
from controllers.console.setup import setup_required
from controllers.console.wraps import account_initialization_required
from libs.helper import TimestampField
from extensions.ext_database import db
from models.account import Account, TenantAccountJoin
from services.account_service import TenantService, RegisterService
account_fields = {
'id': fields.String,
'name': fields.String,
'avatar': fields.String,
'email': fields.String,
'last_login_at': TimestampField,
'created_at': TimestampField,
'role': fields.String,
'status': fields.String,
}
account_list_fields = {
'accounts': fields.List(fields.Nested(account_fields))
}
class MemberListApi(Resource):
"""List all members of current tenant."""
@setup_required
@login_required
@account_initialization_required
@marshal_with(account_list_fields)
def get(self):
members = TenantService.get_tenant_members(current_user.current_tenant)
return {'result': 'success', 'accounts': members}, 200
class MemberInviteEmailApi(Resource):
"""Invite a new member by email."""
@setup_required
@login_required
@account_initialization_required
def post(self):
parser = reqparse.RequestParser()
parser.add_argument('email', type=str, required=True, location='json')
parser.add_argument('role', type=str, required=True, default='admin', location='json')
args = parser.parse_args()
invitee_email = args['email']
invitee_role = args['role']
if invitee_role not in ['admin', 'normal']:
return {'code': 'invalid-role', 'message': 'Invalid role'}, 400
inviter = current_user
try:
RegisterService.invite_new_member(inviter.current_tenant, invitee_email, role=invitee_role, inviter=inviter)
account = db.session.query(Account, TenantAccountJoin.role).join(
TenantAccountJoin, Account.id == TenantAccountJoin.account_id
).filter(Account.email == args['email']).first()
account, role = account
account = marshal(account, account_fields)
account['role'] = role
except services.errors.account.CannotOperateSelfError as e:
return {'code': 'cannot-operate-self', 'message': str(e)}, 400
except services.errors.account.NoPermissionError as e:
return {'code': 'forbidden', 'message': str(e)}, 403
except services.errors.account.AccountAlreadyInTenantError as e:
return {'code': 'email-taken', 'message': str(e)}, 409
except Exception as e:
return {'code': 'unexpected-error', 'message': str(e)}, 500
# todo:413
return {'result': 'success', 'account': account}, 201
class MemberCancelInviteApi(Resource):
"""Cancel an invitation by member id."""
@setup_required
@login_required
@account_initialization_required
def delete(self, member_id):
member = Account.query.get(str(member_id))
if not member:
abort(404)
try:
TenantService.remove_member_from_tenant(current_user.current_tenant, member, current_user)
except services.errors.account.CannotOperateSelfError as e:
return {'code': 'cannot-operate-self', 'message': str(e)}, 400
except services.errors.account.NoPermissionError as e:
return {'code': 'forbidden', 'message': str(e)}, 403
except services.errors.account.MemberNotInTenantError as e:
return {'code': 'member-not-found', 'message': str(e)}, 404
except Exception as e:
raise ValueError(str(e))
return {'result': 'success'}, 204
class MemberUpdateRoleApi(Resource):
"""Update member role."""
@setup_required
@login_required
@account_initialization_required
def put(self, member_id):
parser = reqparse.RequestParser()
parser.add_argument('role', type=str, required=True, location='json')
args = parser.parse_args()
new_role = args['role']
if new_role not in ['admin', 'normal', 'owner']:
return {'code': 'invalid-role', 'message': 'Invalid role'}, 400
member = Account.query.get(str(member_id))
if not member:
abort(404)
try:
TenantService.update_member_role(current_user.current_tenant, member, new_role, current_user)
except Exception as e:
raise ValueError(str(e))
# todo: 403
return {'result': 'success'}
api.add_resource(MemberListApi, '/workspaces/current/members')
api.add_resource(MemberInviteEmailApi, '/workspaces/current/members/invite-email')
api.add_resource(MemberCancelInviteApi, '/workspaces/current/members/<uuid:member_id>')
api.add_resource(MemberUpdateRoleApi, '/workspaces/current/members/<uuid:member_id>/update-role')

View File

@ -0,0 +1,246 @@
# -*- coding:utf-8 -*-
import base64
import json
import logging
from flask_login import login_required, current_user
from flask_restful import Resource, reqparse, abort
from werkzeug.exceptions import Forbidden
from controllers.console import api
from controllers.console.setup import setup_required
from controllers.console.wraps import account_initialization_required
from core.llm.provider.errors import ValidateFailedError
from extensions.ext_database import db
from libs import rsa
from models.provider import Provider, ProviderType, ProviderName
from services.provider_service import ProviderService
class ProviderListApi(Resource):
@setup_required
@login_required
@account_initialization_required
def get(self):
tenant_id = current_user.current_tenant_id
"""
If the type is AZURE_OPENAI, decode and return the four fields of azure_api_type, azure_api_version:,
azure_api_base, azure_api_key as an object, where azure_api_key displays the first 6 bits in plaintext, and the
rest is replaced by * and the last two bits are displayed in plaintext
If the type is other, decode and return the Token field directly, the field displays the first 6 bits in
plaintext, the rest is replaced by * and the last two bits are displayed in plaintext
"""
ProviderService.init_supported_provider(current_user.current_tenant, "cloud")
providers = Provider.query.filter_by(tenant_id=tenant_id).all()
provider_list = [
{
'provider_name': p.provider_name,
'provider_type': p.provider_type,
'is_valid': p.is_valid,
'last_used': p.last_used,
'is_enabled': p.is_enabled,
**({
'quota_type': p.quota_type,
'quota_limit': p.quota_limit,
'quota_used': p.quota_used
} if p.provider_type == ProviderType.SYSTEM.value else {}),
'token': ProviderService.get_obfuscated_api_key(current_user.current_tenant,
ProviderName(p.provider_name))
}
for p in providers
]
return provider_list
class ProviderTokenApi(Resource):
@setup_required
@login_required
@account_initialization_required
def post(self, provider):
if provider not in [p.value for p in ProviderName]:
abort(404)
# The role of the current user in the ta table must be admin or owner
if current_user.current_tenant.current_role not in ['admin', 'owner']:
logging.log(logging.ERROR,
f'User {current_user.id} is not authorized to update provider token, current_role is {current_user.current_tenant.current_role}')
raise Forbidden()
parser = reqparse.RequestParser()
parser.add_argument('token', type=ProviderService.get_token_type(
tenant=current_user.current_tenant,
provider_name=ProviderName(provider)
), required=True, nullable=False, location='json')
args = parser.parse_args()
if not args['token']:
raise ValueError('Token is empty')
try:
ProviderService.validate_provider_configs(
tenant=current_user.current_tenant,
provider_name=ProviderName(provider),
configs=args['token']
)
token_is_valid = True
except ValidateFailedError:
token_is_valid = False
tenant = current_user.current_tenant
base64_encrypted_token = ProviderService.get_encrypted_token(
tenant=current_user.current_tenant,
provider_name=ProviderName(provider),
configs=args['token']
)
provider_model = Provider.query.filter_by(tenant_id=tenant.id, provider_name=provider,
provider_type=ProviderType.CUSTOM.value).first()
# Only allow updating token for CUSTOM provider type
if provider_model:
provider_model.encrypted_config = base64_encrypted_token
provider_model.is_valid = token_is_valid
else:
provider_model = Provider(tenant_id=tenant.id, provider_name=provider,
provider_type=ProviderType.CUSTOM.value,
encrypted_config=base64_encrypted_token,
is_valid=token_is_valid)
db.session.add(provider_model)
db.session.commit()
if provider in [ProviderName.ANTHROPIC.value, ProviderName.AZURE_OPENAI.value, ProviderName.COHERE.value,
ProviderName.HUGGINGFACEHUB.value]:
return {'result': 'success', 'warning': 'MOCK: This provider is not supported yet.'}, 201
return {'result': 'success'}, 201
class ProviderTokenValidateApi(Resource):
@setup_required
@login_required
@account_initialization_required
def post(self, provider):
if provider not in [p.value for p in ProviderName]:
abort(404)
parser = reqparse.RequestParser()
parser.add_argument('token', type=ProviderService.get_token_type(
tenant=current_user.current_tenant,
provider_name=ProviderName(provider)
), required=True, nullable=False, location='json')
args = parser.parse_args()
# todo: remove this when the provider is supported
if provider in [ProviderName.ANTHROPIC.value, ProviderName.AZURE_OPENAI.value, ProviderName.COHERE.value,
ProviderName.HUGGINGFACEHUB.value]:
return {'result': 'success', 'warning': 'MOCK: This provider is not supported yet.'}
result = True
error = None
try:
ProviderService.validate_provider_configs(
tenant=current_user.current_tenant,
provider_name=ProviderName(provider),
configs=args['token']
)
except ValidateFailedError as e:
result = False
error = str(e)
response = {'result': 'success' if result else 'error'}
if not result:
response['error'] = error
return response
class ProviderSystemApi(Resource):
@setup_required
@login_required
@account_initialization_required
def put(self, provider):
if provider not in [p.value for p in ProviderName]:
abort(404)
parser = reqparse.RequestParser()
parser.add_argument('is_enabled', type=bool, required=True, location='json')
args = parser.parse_args()
tenant = current_user.current_tenant_id
provider_model = Provider.query.filter_by(tenant_id=tenant.id, provider_name=provider).first()
if provider_model and provider_model.provider_type == ProviderType.SYSTEM.value:
provider_model.is_valid = args['is_enabled']
db.session.commit()
elif not provider_model:
ProviderService.create_system_provider(tenant, provider, args['is_enabled'])
else:
abort(403)
return {'result': 'success'}
@setup_required
@login_required
@account_initialization_required
def get(self, provider):
if provider not in [p.value for p in ProviderName]:
abort(404)
# The role of the current user in the ta table must be admin or owner
if current_user.current_tenant.current_role not in ['admin', 'owner']:
raise Forbidden()
provider_model = db.session.query(Provider).filter(Provider.tenant_id == current_user.current_tenant_id,
Provider.provider_name == provider,
Provider.provider_type == ProviderType.SYSTEM.value).first()
system_model = None
if provider_model:
system_model = {
'result': 'success',
'provider': {
'provider_name': provider_model.provider_name,
'provider_type': provider_model.provider_type,
'is_valid': provider_model.is_valid,
'last_used': provider_model.last_used,
'is_enabled': provider_model.is_enabled,
'quota_type': provider_model.quota_type,
'quota_limit': provider_model.quota_limit,
'quota_used': provider_model.quota_used
}
}
else:
abort(404)
return system_model
api.add_resource(ProviderTokenApi, '/providers/<provider>/token',
endpoint='current_providers_token') # Deprecated
api.add_resource(ProviderTokenValidateApi, '/providers/<provider>/token-validate',
endpoint='current_providers_token_validate') # Deprecated
api.add_resource(ProviderTokenApi, '/workspaces/current/providers/<provider>/token',
endpoint='workspaces_current_providers_token') # PUT for updating provider token
api.add_resource(ProviderTokenValidateApi, '/workspaces/current/providers/<provider>/token-validate',
endpoint='workspaces_current_providers_token_validate') # POST for validating provider token
api.add_resource(ProviderListApi, '/workspaces/current/providers') # GET for getting providers list
api.add_resource(ProviderSystemApi, '/workspaces/current/providers/<provider>/system',
endpoint='workspaces_current_providers_system') # GET for getting provider quota, PUT for updating provider status

View File

@ -0,0 +1,97 @@
# -*- coding:utf-8 -*-
import logging
from flask import request
from flask_login import login_required, current_user
from flask_restful import Resource, fields, marshal_with, reqparse, marshal
from controllers.console import api
from controllers.console.setup import setup_required
from controllers.console.error import AccountNotLinkTenantError
from controllers.console.wraps import account_initialization_required
from libs.helper import TimestampField
from extensions.ext_database import db
from models.account import Tenant
from services.account_service import TenantService
from services.workspace_service import WorkspaceService
provider_fields = {
'provider_name': fields.String,
'provider_type': fields.String,
'is_valid': fields.Boolean,
'token_is_set': fields.Boolean,
}
tenant_fields = {
'id': fields.String,
'name': fields.String,
'plan': fields.String,
'status': fields.String,
'created_at': TimestampField,
'role': fields.String,
'providers': fields.List(fields.Nested(provider_fields)),
'in_trail': fields.Boolean,
'trial_end_reason': fields.String,
}
tenants_fields = {
'id': fields.String,
'name': fields.String,
'plan': fields.String,
'status': fields.String,
'created_at': TimestampField,
'current': fields.Boolean
}
class TenantListApi(Resource):
@setup_required
@login_required
@account_initialization_required
def get(self):
tenants = TenantService.get_join_tenants(current_user)
for tenant in tenants:
if tenant.id == current_user.current_tenant_id:
tenant.current = True # Set current=True for current tenant
return {'workspaces': marshal(tenants, tenants_fields)}, 200
class TenantApi(Resource):
@setup_required
@login_required
@account_initialization_required
@marshal_with(tenant_fields)
def get(self):
if request.path == '/info':
logging.warning('Deprecated URL /info was used.')
tenant = current_user.current_tenant
return WorkspaceService.get_tenant_info(tenant), 200
class SwitchWorkspaceApi(Resource):
@setup_required
@login_required
@account_initialization_required
def post(self):
parser = reqparse.RequestParser()
parser.add_argument('tenant_id', type=str, required=True, location='json')
args = parser.parse_args()
# check if tenant_id is valid, 403 if not
try:
TenantService.switch_tenant(current_user, args['tenant_id'])
except Exception:
raise AccountNotLinkTenantError("Account not link tenant")
new_tenant = db.session.query(Tenant).get(args['tenant_id']) # Get new tenant
return {'result': 'success', 'new_tenant': marshal(WorkspaceService.get_tenant_info(new_tenant), tenant_fields)}
api.add_resource(TenantListApi, '/workspaces') # GET for getting all tenants
api.add_resource(TenantApi, '/workspaces/current', endpoint='workspaces_current') # GET for getting current tenant info
api.add_resource(TenantApi, '/info', endpoint='info') # Deprecated
api.add_resource(SwitchWorkspaceApi, '/workspaces/switch') # POST for switching tenant

View File

@ -0,0 +1,43 @@
# -*- coding:utf-8 -*-
from functools import wraps
from flask import current_app, abort
from flask_login import current_user
from controllers.console.workspace.error import AccountNotInitializedError
def account_initialization_required(view):
@wraps(view)
def decorated(*args, **kwargs):
# check account initialization
account = current_user
if account.status == 'uninitialized':
raise AccountNotInitializedError()
return view(*args, **kwargs)
return decorated
def only_edition_cloud(view):
@wraps(view)
def decorated(*args, **kwargs):
if current_app.config['EDITION'] != 'CLOUD':
abort(404)
return view(*args, **kwargs)
return decorated
def only_edition_self_hosted(view):
@wraps(view)
def decorated(*args, **kwargs):
if current_app.config['EDITION'] != 'SELF_HOSTED':
abort(404)
return view(*args, **kwargs)
return decorated

View File

@ -0,0 +1,12 @@
# -*- coding:utf-8 -*-
from flask import Blueprint
from libs.external_api import ExternalApi
bp = Blueprint('service_api', __name__, url_prefix='/v1')
api = ExternalApi(bp)
from .app import completion, app, conversation, message
from .dataset import document

View File

@ -0,0 +1,27 @@
from extensions.ext_database import db
from models.model import EndUser
def create_or_update_end_user_for_user_id(app_model, user_id):
"""
Create or update session terminal based on user ID.
"""
end_user = db.session.query(EndUser) \
.filter(
EndUser.tenant_id == app_model.tenant_id,
EndUser.session_id == user_id,
EndUser.type == 'service_api'
).first()
if end_user is None:
end_user = EndUser(
tenant_id=app_model.tenant_id,
app_id=app_model.id,
type='service_api',
is_anonymous=True,
session_id=user_id
)
db.session.add(end_user)
db.session.commit()
return end_user

View File

@ -0,0 +1,43 @@
# -*- coding:utf-8 -*-
from flask_restful import fields, marshal_with
from controllers.service_api import api
from controllers.service_api.wraps import AppApiResource
class AppParameterApi(AppApiResource):
"""Resource for app variables."""
variable_fields = {
'key': fields.String,
'name': fields.String,
'description': fields.String,
'type': fields.String,
'default': fields.String,
'max_length': fields.Integer,
'options': fields.List(fields.String)
}
parameters_fields = {
'opening_statement': fields.String,
'suggested_questions': fields.Raw,
'suggested_questions_after_answer': fields.Raw,
'more_like_this': fields.Raw,
'user_input_form': fields.Raw,
}
@marshal_with(parameters_fields)
def get(self, app_model, end_user):
"""Retrieve app parameters."""
app_model_config = app_model.app_model_config
return {
'opening_statement': app_model_config.opening_statement,
'suggested_questions': app_model_config.suggested_questions_list,
'suggested_questions_after_answer': app_model_config.suggested_questions_after_answer_dict,
'more_like_this': app_model_config.more_like_this_dict,
'user_input_form': app_model_config.user_input_form_list
}
api.add_resource(AppParameterApi, '/parameters')

View File

@ -0,0 +1,182 @@
import json
import logging
from typing import Union, Generator
from flask import stream_with_context, Response
from flask_restful import reqparse
from werkzeug.exceptions import NotFound, InternalServerError
import services
from controllers.service_api import api
from controllers.service_api.app import create_or_update_end_user_for_user_id
from controllers.service_api.app.error import AppUnavailableError, ProviderNotInitializeError, NotChatAppError, \
ConversationCompletedError, CompletionRequestError, ProviderQuotaExceededError, \
ProviderModelCurrentlyNotSupportError
from controllers.service_api.wraps import AppApiResource
from core.conversation_message_task import PubHandler
from core.llm.error import LLMBadRequestError, LLMAuthorizationError, LLMAPIUnavailableError, LLMAPIConnectionError, \
LLMRateLimitError, ProviderTokenNotInitError, QuotaExceededError, ModelCurrentlyNotSupportError
from libs.helper import uuid_value
from services.completion_service import CompletionService
class CompletionApi(AppApiResource):
def post(self, app_model, end_user):
if app_model.mode != 'completion':
raise AppUnavailableError()
parser = reqparse.RequestParser()
parser.add_argument('inputs', type=dict, required=True, location='json')
parser.add_argument('query', type=str, location='json')
parser.add_argument('response_mode', type=str, choices=['blocking', 'streaming'], location='json')
parser.add_argument('user', type=str, location='json')
args = parser.parse_args()
streaming = args['response_mode'] == 'streaming'
if end_user is None and args['user'] is not None:
end_user = create_or_update_end_user_for_user_id(app_model, args['user'])
try:
response = CompletionService.completion(
app_model=app_model,
user=end_user,
args=args,
from_source='api',
streaming=streaming
)
return compact_response(response)
except services.errors.conversation.ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")
except services.errors.conversation.ConversationCompletedError:
raise ConversationCompletedError()
except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
raise AppUnavailableError()
except ProviderTokenNotInitError:
raise ProviderNotInitializeError()
except QuotaExceededError:
raise ProviderQuotaExceededError()
except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError()
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
LLMRateLimitError, LLMAuthorizationError) as e:
raise CompletionRequestError(str(e))
except ValueError as e:
raise e
except Exception as e:
logging.exception("internal server error.")
raise InternalServerError()
class CompletionStopApi(AppApiResource):
def post(self, app_model, end_user, task_id):
if app_model.mode != 'completion':
raise AppUnavailableError()
PubHandler.stop(end_user, task_id)
return {'result': 'success'}, 200
class ChatApi(AppApiResource):
def post(self, app_model, end_user):
if app_model.mode != 'chat':
raise NotChatAppError()
parser = reqparse.RequestParser()
parser.add_argument('inputs', type=dict, required=True, location='json')
parser.add_argument('query', type=str, required=True, location='json')
parser.add_argument('response_mode', type=str, choices=['blocking', 'streaming'], location='json')
parser.add_argument('conversation_id', type=uuid_value, location='json')
parser.add_argument('user', type=str, location='json')
args = parser.parse_args()
streaming = args['response_mode'] == 'streaming'
if end_user is None and args['user'] is not None:
end_user = create_or_update_end_user_for_user_id(app_model, args['user'])
try:
response = CompletionService.completion(
app_model=app_model,
user=end_user,
args=args,
from_source='api',
streaming=streaming
)
return compact_response(response)
except services.errors.conversation.ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")
except services.errors.conversation.ConversationCompletedError:
raise ConversationCompletedError()
except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
raise AppUnavailableError()
except ProviderTokenNotInitError:
raise ProviderNotInitializeError()
except QuotaExceededError:
raise ProviderQuotaExceededError()
except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError()
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
LLMRateLimitError, LLMAuthorizationError) as e:
raise CompletionRequestError(str(e))
except ValueError as e:
raise e
except Exception as e:
logging.exception("internal server error.")
raise InternalServerError()
class ChatStopApi(AppApiResource):
def post(self, app_model, end_user, task_id):
if app_model.mode != 'chat':
raise NotChatAppError()
PubHandler.stop(end_user, task_id)
return {'result': 'success'}, 200
def compact_response(response: Union[dict | Generator]) -> Response:
if isinstance(response, dict):
return Response(response=json.dumps(response), status=200, mimetype='application/json')
else:
def generate() -> Generator:
try:
for chunk in response:
yield chunk
except services.errors.conversation.ConversationNotExistsError:
yield "data: " + json.dumps(api.handle_error(NotFound("Conversation Not Exists.")).get_json()) + "\n\n"
except services.errors.conversation.ConversationCompletedError:
yield "data: " + json.dumps(api.handle_error(ConversationCompletedError()).get_json()) + "\n\n"
except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
yield "data: " + json.dumps(api.handle_error(AppUnavailableError()).get_json()) + "\n\n"
except ProviderTokenNotInitError:
yield "data: " + json.dumps(api.handle_error(ProviderNotInitializeError()).get_json()) + "\n\n"
except QuotaExceededError:
yield "data: " + json.dumps(api.handle_error(ProviderQuotaExceededError()).get_json()) + "\n\n"
except ModelCurrentlyNotSupportError:
yield "data: " + json.dumps(api.handle_error(ProviderModelCurrentlyNotSupportError()).get_json()) + "\n\n"
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
LLMRateLimitError, LLMAuthorizationError) as e:
yield "data: " + json.dumps(api.handle_error(CompletionRequestError(str(e))).get_json()) + "\n\n"
except ValueError as e:
yield "data: " + json.dumps(api.handle_error(e).get_json()) + "\n\n"
except Exception:
logging.exception("internal server error.")
yield "data: " + json.dumps(api.handle_error(InternalServerError()).get_json()) + "\n\n"
return Response(stream_with_context(generate()), status=200,
mimetype='text/event-stream')
api.add_resource(CompletionApi, '/completion-messages')
api.add_resource(CompletionStopApi, '/completion-messages/<string:task_id>/stop')
api.add_resource(ChatApi, '/chat-messages')
api.add_resource(ChatStopApi, '/chat-messages/<string:task_id>/stop')

View File

@ -0,0 +1,76 @@
# -*- coding:utf-8 -*-
from flask_restful import fields, marshal_with, reqparse
from flask_restful.inputs import int_range
from werkzeug.exceptions import NotFound
from controllers.service_api import api
from controllers.service_api.app import create_or_update_end_user_for_user_id
from controllers.service_api.app.error import NotChatAppError
from controllers.service_api.wraps import AppApiResource
from libs.helper import TimestampField, uuid_value
import services
from services.conversation_service import ConversationService
conversation_fields = {
'id': fields.String,
'name': fields.String,
'inputs': fields.Raw,
'status': fields.String,
'introduction': fields.String,
'created_at': TimestampField
}
conversation_infinite_scroll_pagination_fields = {
'limit': fields.Integer,
'has_more': fields.Boolean,
'data': fields.List(fields.Nested(conversation_fields))
}
class ConversationApi(AppApiResource):
@marshal_with(conversation_infinite_scroll_pagination_fields)
def get(self, app_model, end_user):
if app_model.mode != 'chat':
raise NotChatAppError()
parser = reqparse.RequestParser()
parser.add_argument('last_id', type=uuid_value, location='args')
parser.add_argument('limit', type=int_range(1, 100), required=False, default=20, location='args')
parser.add_argument('user', type=str, location='args')
args = parser.parse_args()
if end_user is None and args['user'] is not None:
end_user = create_or_update_end_user_for_user_id(app_model, args['user'])
try:
return ConversationService.pagination_by_last_id(app_model, end_user, args['last_id'], args['limit'])
except services.errors.conversation.LastConversationNotExistsError:
raise NotFound("Last Conversation Not Exists.")
class ConversationRenameApi(AppApiResource):
@marshal_with(conversation_fields)
def post(self, app_model, end_user, c_id):
if app_model.mode != 'chat':
raise NotChatAppError()
conversation_id = str(c_id)
parser = reqparse.RequestParser()
parser.add_argument('name', type=str, required=True, location='json')
parser.add_argument('user', type=str, location='json')
args = parser.parse_args()
if end_user is None and args['user'] is not None:
end_user = create_or_update_end_user_for_user_id(app_model, args['user'])
try:
return ConversationService.rename(app_model, conversation_id, end_user, args['name'])
except services.errors.conversation.ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")
api.add_resource(ConversationRenameApi, '/conversations/<uuid:c_id>/name', endpoint='conversation_name')
api.add_resource(ConversationApi, '/conversations')

View File

@ -0,0 +1,51 @@
# -*- coding:utf-8 -*-
from libs.exception import BaseHTTPException
class AppUnavailableError(BaseHTTPException):
error_code = 'app_unavailable'
description = "App unavailable."
code = 400
class NotCompletionAppError(BaseHTTPException):
error_code = 'not_completion_app'
description = "Not Completion App"
code = 400
class NotChatAppError(BaseHTTPException):
error_code = 'not_chat_app'
description = "Not Chat App"
code = 400
class ConversationCompletedError(BaseHTTPException):
error_code = 'conversation_completed'
description = "Conversation Completed."
code = 400
class ProviderNotInitializeError(BaseHTTPException):
error_code = 'provider_not_initialize'
description = "Provider Token not initialize."
code = 400
class ProviderQuotaExceededError(BaseHTTPException):
error_code = 'provider_quota_exceeded'
description = "Provider quota exceeded."
code = 400
class ProviderModelCurrentlyNotSupportError(BaseHTTPException):
error_code = 'model_currently_not_support'
description = "GPT-4 currently not support."
code = 400
class CompletionRequestError(BaseHTTPException):
error_code = 'completion_request_error'
description = "Completion request failed."
code = 400

View File

@ -0,0 +1,81 @@
# -*- coding:utf-8 -*-
from flask_restful import fields, marshal_with, reqparse
from flask_restful.inputs import int_range
from werkzeug.exceptions import NotFound
import services
from controllers.service_api import api
from controllers.service_api.app import create_or_update_end_user_for_user_id
from controllers.service_api.app.error import NotChatAppError
from controllers.service_api.wraps import AppApiResource
from libs.helper import TimestampField, uuid_value
from services.message_service import MessageService
class MessageListApi(AppApiResource):
feedback_fields = {
'rating': fields.String
}
message_fields = {
'id': fields.String,
'conversation_id': fields.String,
'inputs': fields.Raw,
'query': fields.String,
'answer': fields.String,
'feedback': fields.Nested(feedback_fields, attribute='user_feedback', allow_null=True),
'created_at': TimestampField
}
message_infinite_scroll_pagination_fields = {
'limit': fields.Integer,
'has_more': fields.Boolean,
'data': fields.List(fields.Nested(message_fields))
}
@marshal_with(message_infinite_scroll_pagination_fields)
def get(self, app_model, end_user):
if app_model.mode != 'chat':
raise NotChatAppError()
parser = reqparse.RequestParser()
parser.add_argument('conversation_id', required=True, type=uuid_value, location='args')
parser.add_argument('first_id', type=uuid_value, location='args')
parser.add_argument('limit', type=int_range(1, 100), required=False, default=20, location='args')
parser.add_argument('user', type=str, location='args')
args = parser.parse_args()
if end_user is None and args['user'] is not None:
end_user = create_or_update_end_user_for_user_id(app_model, args['user'])
try:
return MessageService.pagination_by_first_id(app_model, end_user,
args['conversation_id'], args['first_id'], args['limit'])
except services.errors.conversation.ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")
except services.errors.message.FirstMessageNotExistsError:
raise NotFound("First Message Not Exists.")
class MessageFeedbackApi(AppApiResource):
def post(self, app_model, end_user, message_id):
message_id = str(message_id)
parser = reqparse.RequestParser()
parser.add_argument('rating', type=str, choices=['like', 'dislike', None], location='json')
parser.add_argument('user', type=str, location='json')
args = parser.parse_args()
if end_user is None and args['user'] is not None:
end_user = create_or_update_end_user_for_user_id(app_model, args['user'])
try:
MessageService.create_feedback(app_model, message_id, end_user, args['rating'])
except services.errors.message.MessageNotExistsError:
raise NotFound("Message Not Exists.")
return {'result': 'success'}
api.add_resource(MessageListApi, '/messages')
api.add_resource(MessageFeedbackApi, '/messages/<uuid:message_id>/feedbacks')

View File

@ -0,0 +1,129 @@
import datetime
import uuid
from flask import current_app
from flask_restful import reqparse
from werkzeug.exceptions import NotFound
import services.dataset_service
from controllers.service_api import api
from controllers.service_api.app.error import ProviderNotInitializeError
from controllers.service_api.dataset.error import ArchivedDocumentImmutableError, DocumentIndexingError, \
DatasetNotInitedError
from controllers.service_api.wraps import DatasetApiResource
from core.llm.error import ProviderTokenNotInitError
from extensions.ext_database import db
from extensions.ext_storage import storage
from models.model import UploadFile
from services.dataset_service import DocumentService
class DocumentListApi(DatasetApiResource):
"""Resource for documents."""
def post(self, dataset):
"""Create document."""
parser = reqparse.RequestParser()
parser.add_argument('name', type=str, required=True, nullable=False, location='json')
parser.add_argument('text', type=str, required=True, nullable=False, location='json')
parser.add_argument('doc_type', type=str, location='json')
parser.add_argument('doc_metadata', type=dict, location='json')
args = parser.parse_args()
if not dataset.indexing_technique:
raise DatasetNotInitedError("Dataset indexing technique must be set.")
doc_type = args.get('doc_type')
doc_metadata = args.get('doc_metadata')
if doc_type and doc_type not in DocumentService.DOCUMENT_METADATA_SCHEMA:
raise ValueError('Invalid doc_type.')
# user uuid as file name
file_uuid = str(uuid.uuid4())
file_key = 'upload_files/' + dataset.tenant_id + '/' + file_uuid + '.txt'
# save file to storage
storage.save(file_key, args.get('text'))
# save file to db
config = current_app.config
upload_file = UploadFile(
tenant_id=dataset.tenant_id,
storage_type=config['STORAGE_TYPE'],
key=file_key,
name=args.get('name') + '.txt',
size=len(args.get('text')),
extension='txt',
mime_type='text/plain',
created_by=dataset.created_by,
created_at=datetime.datetime.utcnow(),
used=True,
used_by=dataset.created_by,
used_at=datetime.datetime.utcnow()
)
db.session.add(upload_file)
db.session.commit()
document_data = {
'data_source': {
'type': 'upload_file',
'info': upload_file.id
}
}
try:
document = DocumentService.save_document_with_dataset_id(
dataset=dataset,
document_data=document_data,
account=dataset.created_by_account,
dataset_process_rule=dataset.latest_process_rule,
created_from='api'
)
except ProviderTokenNotInitError:
raise ProviderNotInitializeError()
if doc_type and doc_metadata:
metadata_schema = DocumentService.DOCUMENT_METADATA_SCHEMA[doc_type]
document.doc_metadata = {}
for key, value_type in metadata_schema.items():
value = doc_metadata.get(key)
if value is not None and isinstance(value, value_type):
document.doc_metadata[key] = value
document.doc_type = doc_type
document.updated_at = datetime.datetime.utcnow()
db.session.commit()
return {'id': document.id}
class DocumentApi(DatasetApiResource):
def delete(self, dataset, document_id):
"""Delete document."""
document_id = str(document_id)
document = DocumentService.get_document(dataset.id, document_id)
# 404 if document not found
if document is None:
raise NotFound("Document Not Exists.")
# 403 if document is archived
if DocumentService.check_archived(document):
raise ArchivedDocumentImmutableError()
try:
# delete document
DocumentService.delete_document(document)
except services.errors.document.DocumentIndexingError:
raise DocumentIndexingError('Cannot delete document during indexing.')
return {'result': 'success'}, 204
api.add_resource(DocumentListApi, '/documents')
api.add_resource(DocumentApi, '/documents/<uuid:document_id>')

View File

@ -0,0 +1,20 @@
# -*- coding:utf-8 -*-
from libs.exception import BaseHTTPException
class ArchivedDocumentImmutableError(BaseHTTPException):
error_code = 'archived_document_immutable'
description = "Cannot operate when document was archived."
code = 403
class DocumentIndexingError(BaseHTTPException):
error_code = 'document_indexing'
description = "Cannot operate document during indexing."
code = 403
class DatasetNotInitedError(BaseHTTPException):
error_code = 'dataset_not_inited'
description = "Dataset not inited."
code = 403

View File

@ -0,0 +1,95 @@
# -*- coding:utf-8 -*-
from datetime import datetime
from functools import wraps
from flask import request
from flask_restful import Resource
from werkzeug.exceptions import NotFound, Unauthorized
from extensions.ext_database import db
from models.dataset import Dataset
from models.model import ApiToken, App
def validate_app_token(view=None):
def decorator(view):
@wraps(view)
def decorated(*args, **kwargs):
api_token = validate_and_get_api_token('app')
app_model = db.session.query(App).get(api_token.app_id)
if not app_model:
raise NotFound()
if app_model.status != 'normal':
raise NotFound()
if not app_model.enable_api:
raise NotFound()
return view(app_model, None, *args, **kwargs)
return decorated
if view:
return decorator(view)
# if view is None, it means that the decorator is used without parentheses
# use the decorator as a function for method_decorators
return decorator
def validate_dataset_token(view=None):
def decorator(view):
@wraps(view)
def decorated(*args, **kwargs):
api_token = validate_and_get_api_token('dataset')
dataset = db.session.query(Dataset).get(api_token.dataset_id)
if not dataset:
raise NotFound()
return view(dataset, *args, **kwargs)
return decorated
if view:
return decorator(view)
# if view is None, it means that the decorator is used without parentheses
# use the decorator as a function for method_decorators
return decorator
def validate_and_get_api_token(scope=None):
"""
Validate and get API token.
"""
auth_header = request.headers.get('Authorization')
if auth_header is None:
raise Unauthorized()
auth_scheme, auth_token = auth_header.split(None, 1)
auth_scheme = auth_scheme.lower()
if auth_scheme != 'bearer':
raise Unauthorized()
api_token = db.session.query(ApiToken).filter(
ApiToken.token == auth_token,
ApiToken.type == scope,
).first()
if not api_token:
raise Unauthorized()
api_token.last_used_at = datetime.utcnow()
db.session.commit()
return api_token
class AppApiResource(Resource):
method_decorators = [validate_app_token]
class DatasetApiResource(Resource):
method_decorators = [validate_dataset_token]

View File

@ -0,0 +1,10 @@
# -*- coding:utf-8 -*-
from flask import Blueprint
from libs.external_api import ExternalApi
bp = Blueprint('web', __name__, url_prefix='/api')
api = ExternalApi(bp)
from . import completion, app, conversation, message, site, saved_message

View File

@ -0,0 +1,42 @@
# -*- coding:utf-8 -*-
from flask_restful import marshal_with, fields
from controllers.web import api
from controllers.web.wraps import WebApiResource
class AppParameterApi(WebApiResource):
"""Resource for app variables."""
variable_fields = {
'key': fields.String,
'name': fields.String,
'description': fields.String,
'type': fields.String,
'default': fields.String,
'max_length': fields.Integer,
'options': fields.List(fields.String)
}
parameters_fields = {
'opening_statement': fields.String,
'suggested_questions': fields.Raw,
'suggested_questions_after_answer': fields.Raw,
'more_like_this': fields.Raw,
'user_input_form': fields.Raw,
}
@marshal_with(parameters_fields)
def get(self, app_model, end_user):
"""Retrieve app parameters."""
app_model_config = app_model.app_model_config
return {
'opening_statement': app_model_config.opening_statement,
'suggested_questions': app_model_config.suggested_questions_list,
'suggested_questions_after_answer': app_model_config.suggested_questions_after_answer_dict,
'more_like_this': app_model_config.more_like_this_dict,
'user_input_form': app_model_config.user_input_form_list
}
api.add_resource(AppParameterApi, '/parameters')

View File

@ -0,0 +1,175 @@
# -*- coding:utf-8 -*-
import json
import logging
from typing import Generator, Union
from flask import Response, stream_with_context
from flask_restful import reqparse
from werkzeug.exceptions import InternalServerError, NotFound
import services
from controllers.web import api
from controllers.web.error import AppUnavailableError, ConversationCompletedError, \
ProviderNotInitializeError, NotChatAppError, NotCompletionAppError, CompletionRequestError, \
ProviderQuotaExceededError, ProviderModelCurrentlyNotSupportError
from controllers.web.wraps import WebApiResource
from core.conversation_message_task import PubHandler
from core.llm.error import LLMBadRequestError, LLMAPIUnavailableError, LLMAuthorizationError, LLMAPIConnectionError, \
LLMRateLimitError, ProviderTokenNotInitError, QuotaExceededError, ModelCurrentlyNotSupportError
from libs.helper import uuid_value
from services.completion_service import CompletionService
# define completion api for user
class CompletionApi(WebApiResource):
def post(self, app_model, end_user):
if app_model.mode != 'completion':
raise NotCompletionAppError()
parser = reqparse.RequestParser()
parser.add_argument('inputs', type=dict, required=True, location='json')
parser.add_argument('query', type=str, location='json')
parser.add_argument('response_mode', type=str, choices=['blocking', 'streaming'], location='json')
args = parser.parse_args()
streaming = args['response_mode'] == 'streaming'
try:
response = CompletionService.completion(
app_model=app_model,
user=end_user,
args=args,
from_source='api',
streaming=streaming
)
return compact_response(response)
except services.errors.conversation.ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")
except services.errors.conversation.ConversationCompletedError:
raise ConversationCompletedError()
except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
raise AppUnavailableError()
except ProviderTokenNotInitError:
raise ProviderNotInitializeError()
except QuotaExceededError:
raise ProviderQuotaExceededError()
except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError()
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
LLMRateLimitError, LLMAuthorizationError) as e:
raise CompletionRequestError(str(e))
except ValueError as e:
raise e
except Exception as e:
logging.exception("internal server error.")
raise InternalServerError()
class CompletionStopApi(WebApiResource):
def post(self, app_model, end_user, task_id):
if app_model.mode != 'completion':
raise NotCompletionAppError()
PubHandler.stop(end_user, task_id)
return {'result': 'success'}, 200
class ChatApi(WebApiResource):
def post(self, app_model, end_user):
if app_model.mode != 'chat':
raise NotChatAppError()
parser = reqparse.RequestParser()
parser.add_argument('inputs', type=dict, required=True, location='json')
parser.add_argument('query', type=str, required=True, location='json')
parser.add_argument('response_mode', type=str, choices=['blocking', 'streaming'], location='json')
parser.add_argument('conversation_id', type=uuid_value, location='json')
args = parser.parse_args()
streaming = args['response_mode'] == 'streaming'
try:
response = CompletionService.completion(
app_model=app_model,
user=end_user,
args=args,
from_source='api',
streaming=streaming
)
return compact_response(response)
except services.errors.conversation.ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")
except services.errors.conversation.ConversationCompletedError:
raise ConversationCompletedError()
except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
raise AppUnavailableError()
except ProviderTokenNotInitError:
raise ProviderNotInitializeError()
except QuotaExceededError:
raise ProviderQuotaExceededError()
except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError()
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
LLMRateLimitError, LLMAuthorizationError) as e:
raise CompletionRequestError(str(e))
except ValueError as e:
raise e
except Exception as e:
logging.exception("internal server error.")
raise InternalServerError()
class ChatStopApi(WebApiResource):
def post(self, app_model, end_user, task_id):
if app_model.mode != 'chat':
raise NotChatAppError()
PubHandler.stop(end_user, task_id)
return {'result': 'success'}, 200
def compact_response(response: Union[dict | Generator]) -> Response:
if isinstance(response, dict):
return Response(response=json.dumps(response), status=200, mimetype='application/json')
else:
def generate() -> Generator:
try:
for chunk in response:
yield chunk
except services.errors.conversation.ConversationNotExistsError:
yield "data: " + json.dumps(api.handle_error(NotFound("Conversation Not Exists.")).get_json()) + "\n\n"
except services.errors.conversation.ConversationCompletedError:
yield "data: " + json.dumps(api.handle_error(ConversationCompletedError()).get_json()) + "\n\n"
except services.errors.app_model_config.AppModelConfigBrokenError:
logging.exception("App model config broken.")
yield "data: " + json.dumps(api.handle_error(AppUnavailableError()).get_json()) + "\n\n"
except ProviderTokenNotInitError:
yield "data: " + json.dumps(api.handle_error(ProviderNotInitializeError()).get_json()) + "\n\n"
except QuotaExceededError:
yield "data: " + json.dumps(api.handle_error(ProviderQuotaExceededError()).get_json()) + "\n\n"
except ModelCurrentlyNotSupportError:
yield "data: " + json.dumps(api.handle_error(ProviderModelCurrentlyNotSupportError()).get_json()) + "\n\n"
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
LLMRateLimitError, LLMAuthorizationError) as e:
yield "data: " + json.dumps(api.handle_error(CompletionRequestError(str(e))).get_json()) + "\n\n"
except ValueError as e:
yield "data: " + json.dumps(api.handle_error(e).get_json()) + "\n\n"
except Exception:
logging.exception("internal server error.")
yield "data: " + json.dumps(api.handle_error(InternalServerError()).get_json()) + "\n\n"
return Response(stream_with_context(generate()), status=200,
mimetype='text/event-stream')
api.add_resource(CompletionApi, '/completion-messages')
api.add_resource(CompletionStopApi, '/completion-messages/<string:task_id>/stop')
api.add_resource(ChatApi, '/chat-messages')
api.add_resource(ChatStopApi, '/chat-messages/<string:task_id>/stop')

View File

@ -0,0 +1,121 @@
# -*- coding:utf-8 -*-
from flask_restful import fields, reqparse, marshal_with
from flask_restful.inputs import int_range
from werkzeug.exceptions import NotFound
from controllers.web import api
from controllers.web.error import NotChatAppError
from controllers.web.wraps import WebApiResource
from libs.helper import TimestampField, uuid_value
from services.conversation_service import ConversationService
from services.errors.conversation import LastConversationNotExistsError, ConversationNotExistsError
from services.web_conversation_service import WebConversationService
conversation_fields = {
'id': fields.String,
'name': fields.String,
'inputs': fields.Raw,
'status': fields.String,
'introduction': fields.String,
'created_at': TimestampField
}
conversation_infinite_scroll_pagination_fields = {
'limit': fields.Integer,
'has_more': fields.Boolean,
'data': fields.List(fields.Nested(conversation_fields))
}
class ConversationListApi(WebApiResource):
@marshal_with(conversation_infinite_scroll_pagination_fields)
def get(self, app_model, end_user):
if app_model.mode != 'chat':
raise NotChatAppError()
parser = reqparse.RequestParser()
parser.add_argument('last_id', type=uuid_value, location='args')
parser.add_argument('limit', type=int_range(1, 100), required=False, default=20, location='args')
parser.add_argument('pinned', type=str, choices=['true', 'false', None], location='args')
args = parser.parse_args()
pinned = None
if 'pinned' in args and args['pinned'] is not None:
pinned = True if args['pinned'] == 'true' else False
try:
return WebConversationService.pagination_by_last_id(
app_model=app_model,
end_user=end_user,
last_id=args['last_id'],
limit=args['limit'],
pinned=pinned
)
except LastConversationNotExistsError:
raise NotFound("Last Conversation Not Exists.")
class ConversationApi(WebApiResource):
def delete(self, app_model, end_user, c_id):
if app_model.mode != 'chat':
raise NotChatAppError()
conversation_id = str(c_id)
ConversationService.delete(app_model, conversation_id, end_user)
WebConversationService.unpin(app_model, conversation_id, end_user)
return {"result": "success"}, 204
class ConversationRenameApi(WebApiResource):
@marshal_with(conversation_fields)
def post(self, app_model, end_user, c_id):
if app_model.mode != 'chat':
raise NotChatAppError()
conversation_id = str(c_id)
parser = reqparse.RequestParser()
parser.add_argument('name', type=str, required=True, location='json')
args = parser.parse_args()
try:
return ConversationService.rename(app_model, conversation_id, end_user, args['name'])
except ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")
class ConversationPinApi(WebApiResource):
def patch(self, app_model, end_user, c_id):
if app_model.mode != 'chat':
raise NotChatAppError()
conversation_id = str(c_id)
try:
WebConversationService.pin(app_model, conversation_id, end_user)
except ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")
return {"result": "success"}
class ConversationUnPinApi(WebApiResource):
def patch(self, app_model, end_user, c_id):
if app_model.mode != 'chat':
raise NotChatAppError()
conversation_id = str(c_id)
WebConversationService.unpin(app_model, conversation_id, end_user)
return {"result": "success"}
api.add_resource(ConversationRenameApi, '/conversations/<uuid:c_id>/name', endpoint='web_conversation_name')
api.add_resource(ConversationListApi, '/conversations')
api.add_resource(ConversationApi, '/conversations/<uuid:c_id>')
api.add_resource(ConversationPinApi, '/conversations/<uuid:c_id>/pin')
api.add_resource(ConversationUnPinApi, '/conversations/<uuid:c_id>/unpin')

View File

@ -0,0 +1,62 @@
# -*- coding:utf-8 -*-
from libs.exception import BaseHTTPException
class AppUnavailableError(BaseHTTPException):
error_code = 'app_unavailable'
description = "App unavailable."
code = 400
class NotCompletionAppError(BaseHTTPException):
error_code = 'not_completion_app'
description = "Not Completion App"
code = 400
class NotChatAppError(BaseHTTPException):
error_code = 'not_chat_app'
description = "Not Chat App"
code = 400
class ConversationCompletedError(BaseHTTPException):
error_code = 'conversation_completed'
description = "Conversation Completed."
code = 400
class ProviderNotInitializeError(BaseHTTPException):
error_code = 'provider_not_initialize'
description = "Provider Token not initialize."
code = 400
class ProviderQuotaExceededError(BaseHTTPException):
error_code = 'provider_quota_exceeded'
description = "Provider quota exceeded."
code = 400
class ProviderModelCurrentlyNotSupportError(BaseHTTPException):
error_code = 'model_currently_not_support'
description = "GPT-4 currently not support."
code = 400
class CompletionRequestError(BaseHTTPException):
error_code = 'completion_request_error'
description = "Completion request failed."
code = 400
class AppMoreLikeThisDisabledError(BaseHTTPException):
error_code = 'app_more_like_this_disabled'
description = "More like this disabled."
code = 403
class AppSuggestedQuestionsAfterAnswerDisabledError(BaseHTTPException):
error_code = 'app_suggested_questions_after_answer_disabled'
description = "Function Suggested questions after answer disabled."
code = 403

View File

@ -0,0 +1,189 @@
# -*- coding:utf-8 -*-
import json
import logging
from typing import Generator, Union
from flask import stream_with_context, Response
from flask_restful import reqparse, fields, marshal_with
from flask_restful.inputs import int_range
from werkzeug.exceptions import NotFound, InternalServerError
import services
from controllers.web import api
from controllers.web.error import NotChatAppError, CompletionRequestError, ProviderNotInitializeError, \
AppMoreLikeThisDisabledError, NotCompletionAppError, AppSuggestedQuestionsAfterAnswerDisabledError, \
ProviderQuotaExceededError, ProviderModelCurrentlyNotSupportError
from controllers.web.wraps import WebApiResource
from core.llm.error import LLMRateLimitError, LLMBadRequestError, LLMAuthorizationError, LLMAPIConnectionError, \
ProviderTokenNotInitError, LLMAPIUnavailableError, QuotaExceededError, ModelCurrentlyNotSupportError
from libs.helper import uuid_value, TimestampField
from services.completion_service import CompletionService
from services.errors.app import MoreLikeThisDisabledError
from services.errors.conversation import ConversationNotExistsError
from services.errors.message import MessageNotExistsError, SuggestedQuestionsAfterAnswerDisabledError
from services.message_service import MessageService
class MessageListApi(WebApiResource):
feedback_fields = {
'rating': fields.String
}
message_fields = {
'id': fields.String,
'conversation_id': fields.String,
'inputs': fields.Raw,
'query': fields.String,
'answer': fields.String,
'feedback': fields.Nested(feedback_fields, attribute='user_feedback', allow_null=True),
'created_at': TimestampField
}
message_infinite_scroll_pagination_fields = {
'limit': fields.Integer,
'has_more': fields.Boolean,
'data': fields.List(fields.Nested(message_fields))
}
@marshal_with(message_infinite_scroll_pagination_fields)
def get(self, app_model, end_user):
if app_model.mode != 'chat':
raise NotChatAppError()
parser = reqparse.RequestParser()
parser.add_argument('conversation_id', required=True, type=uuid_value, location='args')
parser.add_argument('first_id', type=uuid_value, location='args')
parser.add_argument('limit', type=int_range(1, 100), required=False, default=20, location='args')
args = parser.parse_args()
try:
return MessageService.pagination_by_first_id(app_model, end_user,
args['conversation_id'], args['first_id'], args['limit'])
except services.errors.conversation.ConversationNotExistsError:
raise NotFound("Conversation Not Exists.")
except services.errors.message.FirstMessageNotExistsError:
raise NotFound("First Message Not Exists.")
class MessageFeedbackApi(WebApiResource):
def post(self, app_model, end_user, message_id):
message_id = str(message_id)
parser = reqparse.RequestParser()
parser.add_argument('rating', type=str, choices=['like', 'dislike', None], location='json')
args = parser.parse_args()
try:
MessageService.create_feedback(app_model, message_id, end_user, args['rating'])
except services.errors.message.MessageNotExistsError:
raise NotFound("Message Not Exists.")
return {'result': 'success'}
class MessageMoreLikeThisApi(WebApiResource):
def get(self, app_model, end_user, message_id):
if app_model.mode != 'completion':
raise NotCompletionAppError()
message_id = str(message_id)
parser = reqparse.RequestParser()
parser.add_argument('response_mode', type=str, required=True, choices=['blocking', 'streaming'], location='args')
args = parser.parse_args()
streaming = args['response_mode'] == 'streaming'
try:
response = CompletionService.generate_more_like_this(app_model, end_user, message_id, streaming)
return compact_response(response)
except MessageNotExistsError:
raise NotFound("Message Not Exists.")
except MoreLikeThisDisabledError:
raise AppMoreLikeThisDisabledError()
except ProviderTokenNotInitError:
raise ProviderNotInitializeError()
except QuotaExceededError:
raise ProviderQuotaExceededError()
except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError()
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
LLMRateLimitError, LLMAuthorizationError) as e:
raise CompletionRequestError(str(e))
except ValueError as e:
raise e
except Exception:
logging.exception("internal server error.")
raise InternalServerError()
def compact_response(response: Union[dict | Generator]) -> Response:
if isinstance(response, dict):
return Response(response=json.dumps(response), status=200, mimetype='application/json')
else:
def generate() -> Generator:
try:
for chunk in response:
yield chunk
except MessageNotExistsError:
yield "data: " + json.dumps(api.handle_error(NotFound("Message Not Exists.")).get_json()) + "\n\n"
except MoreLikeThisDisabledError:
yield "data: " + json.dumps(api.handle_error(AppMoreLikeThisDisabledError()).get_json()) + "\n\n"
except ProviderTokenNotInitError:
yield "data: " + json.dumps(api.handle_error(ProviderNotInitializeError()).get_json()) + "\n\n"
except QuotaExceededError:
yield "data: " + json.dumps(api.handle_error(ProviderQuotaExceededError()).get_json()) + "\n\n"
except ModelCurrentlyNotSupportError:
yield "data: " + json.dumps(api.handle_error(ProviderModelCurrentlyNotSupportError()).get_json()) + "\n\n"
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
LLMRateLimitError, LLMAuthorizationError) as e:
yield "data: " + json.dumps(api.handle_error(CompletionRequestError(str(e))).get_json()) + "\n\n"
except ValueError as e:
yield "data: " + json.dumps(api.handle_error(e).get_json()) + "\n\n"
except Exception:
logging.exception("internal server error.")
yield "data: " + json.dumps(api.handle_error(InternalServerError()).get_json()) + "\n\n"
return Response(stream_with_context(generate()), status=200,
mimetype='text/event-stream')
class MessageSuggestedQuestionApi(WebApiResource):
def get(self, app_model, end_user, message_id):
if app_model.mode != 'chat':
raise NotCompletionAppError()
message_id = str(message_id)
try:
questions = MessageService.get_suggested_questions_after_answer(
app_model=app_model,
user=end_user,
message_id=message_id
)
except MessageNotExistsError:
raise NotFound("Message not found")
except ConversationNotExistsError:
raise NotFound("Conversation not found")
except SuggestedQuestionsAfterAnswerDisabledError:
raise AppSuggestedQuestionsAfterAnswerDisabledError()
except ProviderTokenNotInitError:
raise ProviderNotInitializeError()
except QuotaExceededError:
raise ProviderQuotaExceededError()
except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError()
except (LLMBadRequestError, LLMAPIConnectionError, LLMAPIUnavailableError,
LLMRateLimitError, LLMAuthorizationError) as e:
raise CompletionRequestError(str(e))
except Exception:
logging.exception("internal server error.")
raise InternalServerError()
return {'data': questions}
api.add_resource(MessageListApi, '/messages')
api.add_resource(MessageFeedbackApi, '/messages/<uuid:message_id>/feedbacks')
api.add_resource(MessageMoreLikeThisApi, '/messages/<uuid:message_id>/more-like-this')
api.add_resource(MessageSuggestedQuestionApi, '/messages/<uuid:message_id>/suggested-questions')

View File

@ -0,0 +1,74 @@
from flask_restful import reqparse, marshal_with, fields
from flask_restful.inputs import int_range
from werkzeug.exceptions import NotFound
from controllers.web import api
from controllers.web.error import NotCompletionAppError
from controllers.web.wraps import WebApiResource
from libs.helper import uuid_value, TimestampField
from services.errors.message import MessageNotExistsError
from services.saved_message_service import SavedMessageService
feedback_fields = {
'rating': fields.String
}
message_fields = {
'id': fields.String,
'inputs': fields.Raw,
'query': fields.String,
'answer': fields.String,
'feedback': fields.Nested(feedback_fields, attribute='user_feedback', allow_null=True),
'created_at': TimestampField
}
class SavedMessageListApi(WebApiResource):
saved_message_infinite_scroll_pagination_fields = {
'limit': fields.Integer,
'has_more': fields.Boolean,
'data': fields.List(fields.Nested(message_fields))
}
@marshal_with(saved_message_infinite_scroll_pagination_fields)
def get(self, app_model, end_user):
if app_model.mode != 'completion':
raise NotCompletionAppError()
parser = reqparse.RequestParser()
parser.add_argument('last_id', type=uuid_value, location='args')
parser.add_argument('limit', type=int_range(1, 100), required=False, default=20, location='args')
args = parser.parse_args()
return SavedMessageService.pagination_by_last_id(app_model, end_user, args['last_id'], args['limit'])
def post(self, app_model, end_user):
if app_model.mode != 'completion':
raise NotCompletionAppError()
parser = reqparse.RequestParser()
parser.add_argument('message_id', type=uuid_value, required=True, location='json')
args = parser.parse_args()
try:
SavedMessageService.save(app_model, end_user, args['message_id'])
except MessageNotExistsError:
raise NotFound("Message Not Exists.")
return {'result': 'success'}
class SavedMessageApi(WebApiResource):
def delete(self, app_model, end_user, message_id):
message_id = str(message_id)
if app_model.mode != 'completion':
raise NotCompletionAppError()
SavedMessageService.delete(app_model, end_user, message_id)
return {'result': 'success'}
api.add_resource(SavedMessageListApi, '/saved-messages')
api.add_resource(SavedMessageApi, '/saved-messages/<uuid:message_id>')

View File

@ -0,0 +1,73 @@
# -*- coding:utf-8 -*-
from flask_restful import fields, marshal_with
from werkzeug.exceptions import Forbidden
from controllers.web import api
from controllers.web.wraps import WebApiResource
from extensions.ext_database import db
from models.model import Site
class AppSiteApi(WebApiResource):
"""Resource for app sites."""
model_config_fields = {
'opening_statement': fields.String,
'suggested_questions': fields.Raw(attribute='suggested_questions_list'),
'suggested_questions_after_answer': fields.Raw(attribute='suggested_questions_after_answer_dict'),
'more_like_this': fields.Raw(attribute='more_like_this_dict'),
'model': fields.Raw(attribute='model_dict'),
'user_input_form': fields.Raw(attribute='user_input_form_list'),
'pre_prompt': fields.String,
}
site_fields = {
'title': fields.String,
'icon': fields.String,
'icon_background': fields.String,
'description': fields.String,
'copyright': fields.String,
'privacy_policy': fields.String,
'default_language': fields.String,
'prompt_public': fields.Boolean
}
app_fields = {
'app_id': fields.String,
'end_user_id': fields.String,
'enable_site': fields.Boolean,
'site': fields.Nested(site_fields),
'model_config': fields.Nested(model_config_fields, allow_null=True),
'plan': fields.String,
}
@marshal_with(app_fields)
def get(self, app_model, end_user):
"""Retrieve app site info."""
# get site
site = db.session.query(Site).filter(Site.app_id == app_model.id).first()
if not site:
raise Forbidden()
return AppSiteInfo(app_model.tenant, app_model, site, end_user.id)
api.add_resource(AppSiteApi, '/site')
class AppSiteInfo:
"""Class to store site information."""
def __init__(self, tenant, app, site, end_user):
"""Initialize AppSiteInfo instance."""
self.app_id = app.id
self.end_user_id = end_user
self.enable_site = app.enable_site
self.site = site
self.model_config = None
self.plan = tenant.plan
if app.enable_site and site.prompt_public:
app_model_config = app.app_model_config
self.model_config = app_model_config

View File

@ -0,0 +1,107 @@
# -*- coding:utf-8 -*-
import uuid
from functools import wraps
from flask import request, session
from flask_restful import Resource
from werkzeug.exceptions import NotFound, Unauthorized
from extensions.ext_database import db
from models.model import App, Site, EndUser
def validate_token(view=None):
def decorator(view):
@wraps(view)
def decorated(*args, **kwargs):
site = validate_and_get_site()
app_model = db.session.query(App).get(site.app_id)
if not app_model:
raise NotFound()
if app_model.status != 'normal':
raise NotFound()
if not app_model.enable_site:
raise NotFound()
end_user = create_or_update_end_user_for_session(app_model)
return view(app_model, end_user, *args, **kwargs)
return decorated
if view:
return decorator(view)
return decorator
def validate_and_get_site():
"""
Validate and get API token.
"""
auth_header = request.headers.get('Authorization')
if auth_header is None:
raise Unauthorized()
auth_scheme, auth_token = auth_header.split(None, 1)
auth_scheme = auth_scheme.lower()
if auth_scheme != 'bearer':
raise Unauthorized()
site = db.session.query(Site).filter(
Site.code == auth_token,
Site.status == 'normal'
).first()
if not site:
raise NotFound()
return site
def create_or_update_end_user_for_session(app_model):
"""
Create or update session terminal based on session ID.
"""
if 'session_id' not in session:
session['session_id'] = generate_session_id()
session_id = session.get('session_id')
end_user = db.session.query(EndUser) \
.filter(
EndUser.session_id == session_id,
EndUser.type == 'browser'
).first()
if end_user is None:
end_user = EndUser(
tenant_id=app_model.tenant_id,
app_id=app_model.id,
type='browser',
is_anonymous=True,
session_id=session_id
)
db.session.add(end_user)
db.session.commit()
return end_user
def generate_session_id():
"""
Generate a unique session ID.
"""
count = 1
session_id = ''
while count != 0:
session_id = str(uuid.uuid4())
count = db.session.query(EndUser) \
.filter(EndUser.session_id == session_id).count()
return session_id
class WebApiResource(Resource):
method_decorators = [validate_token]

52
api/core/__init__.py Normal file
View File

@ -0,0 +1,52 @@
import os
from typing import Optional
import langchain
from flask import Flask
from jieba.analyse import default_tfidf
from langchain import set_handler
from langchain.prompts.base import DEFAULT_FORMATTER_MAPPING
from llama_index import IndexStructType, QueryMode
from llama_index.indices.registry import INDEX_STRUT_TYPE_TO_QUERY_MAP
from pydantic import BaseModel
from core.callback_handler.std_out_callback_handler import DifyStdOutCallbackHandler
from core.index.keyword_table.jieba_keyword_table import GPTJIEBAKeywordTableIndex
from core.index.keyword_table.stopwords import STOPWORDS
from core.prompt.prompt_template import OneLineFormatter
from core.vector_store.vector_store import VectorStore
from core.vector_store.vector_store_index_query import EnhanceGPTVectorStoreIndexQuery
class HostedOpenAICredential(BaseModel):
api_key: str
class HostedLLMCredentials(BaseModel):
openai: Optional[HostedOpenAICredential] = None
hosted_llm_credentials = HostedLLMCredentials()
def init_app(app: Flask):
formatter = OneLineFormatter()
DEFAULT_FORMATTER_MAPPING['f-string'] = formatter.format
INDEX_STRUT_TYPE_TO_QUERY_MAP[IndexStructType.KEYWORD_TABLE] = GPTJIEBAKeywordTableIndex.get_query_map()
INDEX_STRUT_TYPE_TO_QUERY_MAP[IndexStructType.WEAVIATE] = {
QueryMode.DEFAULT: EnhanceGPTVectorStoreIndexQuery,
QueryMode.EMBEDDING: EnhanceGPTVectorStoreIndexQuery,
}
INDEX_STRUT_TYPE_TO_QUERY_MAP[IndexStructType.QDRANT] = {
QueryMode.DEFAULT: EnhanceGPTVectorStoreIndexQuery,
QueryMode.EMBEDDING: EnhanceGPTVectorStoreIndexQuery,
}
default_tfidf.stop_words = STOPWORDS
if os.environ.get("DEBUG") and os.environ.get("DEBUG").lower() == 'true':
langchain.verbose = True
set_handler(DifyStdOutCallbackHandler())
if app.config.get("OPENAI_API_KEY"):
hosted_llm_credentials.openai = HostedOpenAICredential(api_key=app.config.get("OPENAI_API_KEY"))

View File

@ -0,0 +1,89 @@
from typing import Optional
from langchain import LLMChain
from langchain.agents import ZeroShotAgent, AgentExecutor, ConversationalAgent
from langchain.callbacks import CallbackManager
from langchain.memory.chat_memory import BaseChatMemory
from core.callback_handler.agent_loop_gather_callback_handler import AgentLoopGatherCallbackHandler
from core.callback_handler.dataset_tool_callback_handler import DatasetToolCallbackHandler
from core.callback_handler.std_out_callback_handler import DifyStdOutCallbackHandler
from core.llm.llm_builder import LLMBuilder
class AgentBuilder:
@classmethod
def to_agent_chain(cls, tenant_id: str, tools, memory: Optional[BaseChatMemory],
dataset_tool_callback_handler: DatasetToolCallbackHandler,
agent_loop_gather_callback_handler: AgentLoopGatherCallbackHandler):
llm_callback_manager = CallbackManager([agent_loop_gather_callback_handler, DifyStdOutCallbackHandler()])
llm = LLMBuilder.to_llm(
tenant_id=tenant_id,
model_name=agent_loop_gather_callback_handler.model_name,
temperature=0,
max_tokens=1024,
callback_manager=llm_callback_manager
)
tool_callback_manager = CallbackManager([
agent_loop_gather_callback_handler,
dataset_tool_callback_handler,
DifyStdOutCallbackHandler()
])
for tool in tools:
tool.callback_manager = tool_callback_manager
prompt = cls.build_agent_prompt_template(
tools=tools,
memory=memory,
)
agent_llm_chain = LLMChain(
llm=llm,
prompt=prompt,
)
agent = cls.build_agent(agent_llm_chain=agent_llm_chain, memory=memory)
agent_callback_manager = CallbackManager(
[agent_loop_gather_callback_handler, DifyStdOutCallbackHandler()]
)
agent_chain = AgentExecutor.from_agent_and_tools(
tools=tools,
agent=agent,
memory=memory,
callback_manager=agent_callback_manager,
max_iterations=6,
early_stopping_method="generate",
# `generate` will continue to complete the last inference after reaching the iteration limit or request time limit
)
return agent_chain
@classmethod
def build_agent_prompt_template(cls, tools, memory: Optional[BaseChatMemory]):
if memory:
prompt = ConversationalAgent.create_prompt(
tools=tools,
)
else:
prompt = ZeroShotAgent.create_prompt(
tools=tools,
)
return prompt
@classmethod
def build_agent(cls, agent_llm_chain: LLMChain, memory: Optional[BaseChatMemory]):
if memory:
agent = ConversationalAgent(
llm_chain=agent_llm_chain
)
else:
agent = ZeroShotAgent(
llm_chain=agent_llm_chain
)
return agent

View File

@ -0,0 +1,178 @@
import logging
import time
from typing import Any, Dict, List, Union, Optional
from langchain.callbacks.base import BaseCallbackHandler
from langchain.schema import AgentAction, AgentFinish, LLMResult
from core.callback_handler.entity.agent_loop import AgentLoop
from core.conversation_message_task import ConversationMessageTask
class AgentLoopGatherCallbackHandler(BaseCallbackHandler):
"""Callback Handler that prints to std out."""
def __init__(self, model_name, conversation_message_task: ConversationMessageTask) -> None:
"""Initialize callback handler."""
self.model_name = model_name
self.conversation_message_task = conversation_message_task
self._agent_loops = []
self._current_loop = None
self.current_chain = None
@property
def agent_loops(self) -> List[AgentLoop]:
return self._agent_loops
def clear_agent_loops(self) -> None:
self._agent_loops = []
self._current_loop = None
@property
def always_verbose(self) -> bool:
"""Whether to call verbose callbacks even if verbose is False."""
return True
@property
def ignore_chain(self) -> bool:
"""Whether to ignore chain callbacks."""
return True
def on_llm_start(
self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
) -> None:
"""Print out the prompts."""
# serialized={'name': 'OpenAI'}
# prompts=['Answer the following questions...\nThought:']
# kwargs={}
if not self._current_loop:
# Agent start with a LLM query
self._current_loop = AgentLoop(
position=len(self._agent_loops) + 1,
prompt=prompts[0],
status='llm_started',
started_at=time.perf_counter()
)
def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
"""Do nothing."""
# kwargs={}
if self._current_loop and self._current_loop.status == 'llm_started':
self._current_loop.status = 'llm_end'
self._current_loop.prompt_tokens = response.llm_output['token_usage']['prompt_tokens']
self._current_loop.completion = response.generations[0][0].text
self._current_loop.completion_tokens = response.llm_output['token_usage']['completion_tokens']
def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
"""Do nothing."""
pass
def on_llm_error(
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
) -> None:
logging.error(error)
self._agent_loops = []
self._current_loop = None
def on_chain_start(
self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
) -> None:
"""Print out that we are entering a chain."""
pass
def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
"""Print out that we finished a chain."""
pass
def on_chain_error(
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
) -> None:
logging.error(error)
def on_tool_start(
self,
serialized: Dict[str, Any],
input_str: str,
**kwargs: Any,
) -> None:
"""Do nothing."""
# kwargs={'color': 'green', 'llm_prefix': 'Thought:', 'observation_prefix': 'Observation: '}
# input_str='action-input'
# serialized={'description': 'A search engine. Useful for when you need to answer questions about current events. Input should be a search query.', 'name': 'Search'}
pass
def on_agent_action(
self, action: AgentAction, color: Optional[str] = None, **kwargs: Any
) -> Any:
"""Run on agent action."""
tool = action.tool
tool_input = action.tool_input
action_name_position = action.log.index("\nAction:") + 1 if action.log else -1
thought = action.log[:action_name_position].strip() if action.log else ''
if self._current_loop and self._current_loop.status == 'llm_end':
self._current_loop.status = 'agent_action'
self._current_loop.thought = thought
self._current_loop.tool_name = tool
self._current_loop.tool_input = tool_input
def on_tool_end(
self,
output: str,
color: Optional[str] = None,
observation_prefix: Optional[str] = None,
llm_prefix: Optional[str] = None,
**kwargs: Any,
) -> None:
"""If not the final action, print out observation."""
# kwargs={'name': 'Search'}
# llm_prefix='Thought:'
# observation_prefix='Observation: '
# output='53 years'
if self._current_loop and self._current_loop.status == 'agent_action' and output and output != 'None':
self._current_loop.status = 'tool_end'
self._current_loop.tool_output = output
self._current_loop.completed = True
self._current_loop.completed_at = time.perf_counter()
self._current_loop.latency = self._current_loop.completed_at - self._current_loop.started_at
self.conversation_message_task.on_agent_end(self.current_chain, self.model_name, self._current_loop)
self._agent_loops.append(self._current_loop)
self._current_loop = None
def on_tool_error(
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
) -> None:
"""Do nothing."""
logging.error(error)
self._agent_loops = []
self._current_loop = None
def on_text(
self,
text: str,
color: Optional[str] = None,
end: str = "",
**kwargs: Optional[str],
) -> None:
"""Run on additional input from chains and agents."""
pass
def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> Any:
"""Run on agent end."""
# Final Answer
if self._current_loop and (self._current_loop.status == 'llm_end' or self._current_loop.status == 'agent_action'):
self._current_loop.status = 'agent_finish'
self._current_loop.completed = True
self._current_loop.completed_at = time.perf_counter()
self._current_loop.latency = self._current_loop.completed_at - self._current_loop.started_at
self.conversation_message_task.on_agent_end(self.current_chain, self.model_name, self._current_loop)
self._agent_loops.append(self._current_loop)
self._current_loop = None
elif not self._current_loop and self._agent_loops:
self._agent_loops[-1].status = 'agent_finish'

View File

@ -0,0 +1,117 @@
import logging
from typing import Any, Dict, List, Union, Optional
from langchain.callbacks.base import BaseCallbackHandler
from langchain.schema import AgentAction, AgentFinish, LLMResult
from core.callback_handler.entity.dataset_query import DatasetQueryObj
from core.conversation_message_task import ConversationMessageTask
class DatasetToolCallbackHandler(BaseCallbackHandler):
"""Callback Handler that prints to std out."""
def __init__(self, conversation_message_task: ConversationMessageTask) -> None:
"""Initialize callback handler."""
self.queries = []
self.conversation_message_task = conversation_message_task
@property
def always_verbose(self) -> bool:
"""Whether to call verbose callbacks even if verbose is False."""
return True
@property
def ignore_llm(self) -> bool:
"""Whether to ignore LLM callbacks."""
return True
@property
def ignore_chain(self) -> bool:
"""Whether to ignore chain callbacks."""
return True
@property
def ignore_agent(self) -> bool:
"""Whether to ignore agent callbacks."""
return False
def on_tool_start(
self,
serialized: Dict[str, Any],
input_str: str,
**kwargs: Any,
) -> None:
tool_name = serialized.get('name')
dataset_id = tool_name[len("dataset-"):]
self.conversation_message_task.on_dataset_query_end(DatasetQueryObj(dataset_id=dataset_id, query=input_str))
def on_tool_end(
self,
output: str,
color: Optional[str] = None,
observation_prefix: Optional[str] = None,
llm_prefix: Optional[str] = None,
**kwargs: Any,
) -> None:
# kwargs={'name': 'Search'}
# llm_prefix='Thought:'
# observation_prefix='Observation: '
# output='53 years'
pass
def on_tool_error(
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
) -> None:
"""Do nothing."""
logging.error(error)
def on_chain_start(
self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
) -> None:
pass
def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
pass
def on_chain_error(
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
) -> None:
pass
def on_llm_start(
self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
) -> None:
pass
def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
pass
def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
"""Do nothing."""
pass
def on_llm_error(
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
) -> None:
logging.error(error)
def on_agent_action(
self, action: AgentAction, color: Optional[str] = None, **kwargs: Any
) -> Any:
pass
def on_text(
self,
text: str,
color: Optional[str] = None,
end: str = "",
**kwargs: Optional[str],
) -> None:
"""Run on additional input from chains and agents."""
pass
def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> Any:
"""Run on agent end."""
pass

View File

@ -0,0 +1,23 @@
from pydantic import BaseModel
class AgentLoop(BaseModel):
position: int = 1
thought: str = None
tool_name: str = None
tool_input: str = None
tool_output: str = None
prompt: str = None
prompt_tokens: int = None
completion: str = None
completion_tokens: int = None
latency: float = None
status: str = 'llm_started'
completed: bool = False
started_at: float = None
completed_at: float = None

View File

@ -0,0 +1,16 @@
from pydantic import BaseModel
class ChainResult(BaseModel):
type: str = None
prompt: dict = None
completion: dict = None
status: str = 'chain_started'
completed: bool = False
started_at: float = None
completed_at: float = None
agent_result: dict = None
"""only when type is 'AgentExecutor'"""

View File

@ -0,0 +1,6 @@
from pydantic import BaseModel
class DatasetQueryObj(BaseModel):
dataset_id: str = None
query: str = None

View File

@ -0,0 +1,9 @@
from pydantic import BaseModel
class LLMMessage(BaseModel):
prompt: str = ''
prompt_tokens: int = 0
completion: str = ''
completion_tokens: int = 0
latency: float = 0.0

View File

@ -0,0 +1,38 @@
from llama_index import Response
from extensions.ext_database import db
from models.dataset import DocumentSegment
class IndexToolCallbackHandler:
def __init__(self) -> None:
self._response = None
@property
def response(self) -> Response:
return self._response
def on_tool_end(self, response: Response) -> None:
"""Handle tool end."""
self._response = response
class DatasetIndexToolCallbackHandler(IndexToolCallbackHandler):
"""Callback handler for dataset tool."""
def __init__(self, dataset_id: str) -> None:
super().__init__()
self.dataset_id = dataset_id
def on_tool_end(self, response: Response) -> None:
"""Handle tool end."""
for node in response.source_nodes:
index_node_id = node.node.doc_id
# add hit count to document segment
db.session.query(DocumentSegment).filter(
DocumentSegment.dataset_id == self.dataset_id,
DocumentSegment.index_node_id == index_node_id
).update({DocumentSegment.hit_count: DocumentSegment.hit_count + 1}, synchronize_session=False)

View File

@ -0,0 +1,147 @@
import logging
import time
from typing import Any, Dict, List, Union, Optional
from langchain.callbacks.base import BaseCallbackHandler
from langchain.schema import AgentAction, AgentFinish, LLMResult, HumanMessage, AIMessage, SystemMessage
from core.callback_handler.entity.llm_message import LLMMessage
from core.conversation_message_task import ConversationMessageTask, ConversationTaskStoppedException
from core.llm.streamable_chat_open_ai import StreamableChatOpenAI
from core.llm.streamable_open_ai import StreamableOpenAI
class LLMCallbackHandler(BaseCallbackHandler):
def __init__(self, llm: Union[StreamableOpenAI, StreamableChatOpenAI],
conversation_message_task: ConversationMessageTask):
self.llm = llm
self.llm_message = LLMMessage()
self.start_at = None
self.conversation_message_task = conversation_message_task
@property
def always_verbose(self) -> bool:
"""Whether to call verbose callbacks even if verbose is False."""
return True
def on_llm_start(
self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
) -> None:
self.start_at = time.perf_counter()
if 'Chat' in serialized['name']:
real_prompts = []
messages = []
for prompt in prompts:
role, content = prompt.split(': ', maxsplit=1)
if role == 'human':
role = 'user'
message = HumanMessage(content=content)
elif role == 'ai':
role = 'assistant'
message = AIMessage(content=content)
else:
message = SystemMessage(content=content)
real_prompt = {
"role": role,
"text": content
}
real_prompts.append(real_prompt)
messages.append(message)
self.llm_message.prompt = real_prompts
self.llm_message.prompt_tokens = self.llm.get_messages_tokens(messages)
else:
self.llm_message.prompt = [{
"role": 'user',
"text": prompts[0]
}]
self.llm_message.prompt_tokens = self.llm.get_num_tokens(prompts[0])
def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
end_at = time.perf_counter()
self.llm_message.latency = end_at - self.start_at
if not self.conversation_message_task.streaming:
self.conversation_message_task.append_message_text(response.generations[0][0].text)
self.llm_message.completion = response.generations[0][0].text
self.llm_message.completion_tokens = response.llm_output['token_usage']['completion_tokens']
else:
self.llm_message.completion_tokens = self.llm.get_num_tokens(self.llm_message.completion)
self.conversation_message_task.save_message(self.llm_message)
def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
self.conversation_message_task.append_message_text(token)
self.llm_message.completion += token
def on_llm_error(
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
) -> None:
"""Do nothing."""
if isinstance(error, ConversationTaskStoppedException):
if self.conversation_message_task.streaming:
end_at = time.perf_counter()
self.llm_message.latency = end_at - self.start_at
self.llm_message.completion_tokens = self.llm.get_num_tokens(self.llm_message.completion)
self.conversation_message_task.save_message(llm_message=self.llm_message, by_stopped=True)
else:
logging.error(error)
def on_chain_start(
self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
) -> None:
pass
def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
pass
def on_chain_error(
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
) -> None:
pass
def on_tool_start(
self,
serialized: Dict[str, Any],
input_str: str,
**kwargs: Any,
) -> None:
pass
def on_agent_action(
self, action: AgentAction, color: Optional[str] = None, **kwargs: Any
) -> Any:
pass
def on_tool_end(
self,
output: str,
color: Optional[str] = None,
observation_prefix: Optional[str] = None,
llm_prefix: Optional[str] = None,
**kwargs: Any,
) -> None:
pass
def on_tool_error(
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
) -> None:
pass
def on_text(
self,
text: str,
color: Optional[str] = None,
end: str = "",
**kwargs: Optional[str],
) -> None:
pass
def on_agent_finish(
self, finish: AgentFinish, color: Optional[str] = None, **kwargs: Any
) -> None:
pass

View File

@ -0,0 +1,137 @@
import logging
import time
from typing import Any, Dict, List, Union, Optional
from langchain.callbacks.base import BaseCallbackHandler
from langchain.schema import AgentAction, AgentFinish, LLMResult
from core.callback_handler.agent_loop_gather_callback_handler import AgentLoopGatherCallbackHandler
from core.callback_handler.entity.chain_result import ChainResult
from core.constant import llm_constant
from core.conversation_message_task import ConversationMessageTask
class MainChainGatherCallbackHandler(BaseCallbackHandler):
"""Callback Handler that prints to std out."""
def __init__(self, conversation_message_task: ConversationMessageTask) -> None:
"""Initialize callback handler."""
self._current_chain_result = None
self._current_chain_message = None
self.conversation_message_task = conversation_message_task
self.agent_loop_gather_callback_handler = AgentLoopGatherCallbackHandler(
llm_constant.agent_model_name,
conversation_message_task
)
def clear_chain_results(self) -> None:
self._current_chain_result = None
self._current_chain_message = None
self.agent_loop_gather_callback_handler.current_chain = None
@property
def always_verbose(self) -> bool:
"""Whether to call verbose callbacks even if verbose is False."""
return True
@property
def ignore_llm(self) -> bool:
"""Whether to ignore LLM callbacks."""
return True
@property
def ignore_agent(self) -> bool:
"""Whether to ignore agent callbacks."""
return True
def on_chain_start(
self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
) -> None:
"""Print out that we are entering a chain."""
if not self._current_chain_result:
self._current_chain_result = ChainResult(
type=serialized['name'],
prompt=inputs,
started_at=time.perf_counter()
)
self._current_chain_message = self.conversation_message_task.init_chain(self._current_chain_result)
self.agent_loop_gather_callback_handler.current_chain = self._current_chain_message
def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
"""Print out that we finished a chain."""
if self._current_chain_result and self._current_chain_result.status == 'chain_started':
self._current_chain_result.status = 'chain_ended'
self._current_chain_result.completion = outputs
self._current_chain_result.completed = True
self._current_chain_result.completed_at = time.perf_counter()
self.conversation_message_task.on_chain_end(self._current_chain_message, self._current_chain_result)
self.clear_chain_results()
def on_chain_error(
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
) -> None:
logging.error(error)
self.clear_chain_results()
def on_llm_start(
self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
) -> None:
pass
def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
pass
def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
"""Do nothing."""
pass
def on_llm_error(
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
) -> None:
logging.error(error)
def on_tool_start(
self,
serialized: Dict[str, Any],
input_str: str,
**kwargs: Any,
) -> None:
pass
def on_agent_action(
self, action: AgentAction, color: Optional[str] = None, **kwargs: Any
) -> Any:
pass
def on_tool_end(
self,
output: str,
color: Optional[str] = None,
observation_prefix: Optional[str] = None,
llm_prefix: Optional[str] = None,
**kwargs: Any,
) -> None:
pass
def on_tool_error(
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
) -> None:
"""Do nothing."""
logging.error(error)
def on_text(
self,
text: str,
color: Optional[str] = None,
end: str = "",
**kwargs: Optional[str],
) -> None:
"""Run on additional input from chains and agents."""
pass
def on_agent_finish(self, finish: AgentFinish, **kwargs: Any) -> Any:
"""Run on agent end."""
pass

View File

@ -0,0 +1,127 @@
import sys
from typing import Any, Dict, List, Optional, Union
from langchain.callbacks.base import BaseCallbackHandler
from langchain.input import print_text
from langchain.schema import AgentAction, AgentFinish, LLMResult
class DifyStdOutCallbackHandler(BaseCallbackHandler):
"""Callback Handler that prints to std out."""
def __init__(self, color: Optional[str] = None) -> None:
"""Initialize callback handler."""
self.color = color
def on_llm_start(
self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
) -> None:
"""Print out the prompts."""
print_text("\n[on_llm_start]\n", color='blue')
if 'Chat' in serialized['name']:
for prompt in prompts:
print_text(prompt + "\n", color='blue')
else:
print_text(prompts[0] + "\n", color='blue')
def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
"""Do nothing."""
print_text("\n[on_llm_end]\nOutput: " + str(response.generations[0][0].text) + "\nllm_output: " + str(
response.llm_output) + "\n", color='blue')
def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
"""Do nothing."""
pass
def on_llm_error(
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
) -> None:
"""Do nothing."""
print_text("\n[on_llm_error]\nError: " + str(error) + "\n", color='blue')
def on_chain_start(
self, serialized: Dict[str, Any], inputs: Dict[str, Any], **kwargs: Any
) -> None:
"""Print out that we are entering a chain."""
class_name = serialized["name"]
print_text("\n[on_chain_start]\nChain: " + class_name + "\nInputs: " + str(inputs) + "\n", color='pink')
def on_chain_end(self, outputs: Dict[str, Any], **kwargs: Any) -> None:
"""Print out that we finished a chain."""
print_text("\n[on_chain_end]\nOutputs: " + str(outputs) + "\n", color='pink')
def on_chain_error(
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
) -> None:
"""Do nothing."""
print_text("\n[on_chain_error]\nError: " + str(error) + "\n", color='pink')
def on_tool_start(
self,
serialized: Dict[str, Any],
input_str: str,
**kwargs: Any,
) -> None:
"""Do nothing."""
print_text("\n[on_tool_start] " + str(serialized), color='yellow')
def on_agent_action(
self, action: AgentAction, color: Optional[str] = None, **kwargs: Any
) -> Any:
"""Run on agent action."""
tool = action.tool
tool_input = action.tool_input
action_name_position = action.log.index("\nAction:") + 1 if action.log else -1
thought = action.log[:action_name_position].strip() if action.log else ''
log = f"Thought: {thought}\nTool: {tool}\nTool Input: {tool_input}"
print_text("\n[on_agent_action]\n" + log + "\n", color='green')
def on_tool_end(
self,
output: str,
color: Optional[str] = None,
observation_prefix: Optional[str] = None,
llm_prefix: Optional[str] = None,
**kwargs: Any,
) -> None:
"""If not the final action, print out observation."""
print_text("\n[on_tool_end]\n", color='yellow')
if observation_prefix:
print_text(f"\n{observation_prefix}")
print_text(output, color='yellow')
if llm_prefix:
print_text(f"\n{llm_prefix}")
print_text("\n")
def on_tool_error(
self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
) -> None:
"""Do nothing."""
print_text("\n[on_tool_error] Error: " + str(error) + "\n", color='yellow')
def on_text(
self,
text: str,
color: Optional[str] = None,
end: str = "",
**kwargs: Optional[str],
) -> None:
"""Run when agent ends."""
print_text("\n[on_text] " + text + "\n", color=color if color else self.color, end=end)
def on_agent_finish(
self, finish: AgentFinish, color: Optional[str] = None, **kwargs: Any
) -> None:
"""Run on agent end."""
print_text("[on_agent_finish] " + finish.return_values['output'] + "\n", color='green', end="\n")
class DifyStreamingStdOutCallbackHandler(DifyStdOutCallbackHandler):
"""Callback handler for streaming. Only works with LLMs that support streaming."""
def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
"""Run on new LLM token. Only available when streaming is enabled."""
sys.stdout.write(token)
sys.stdout.flush()

View File

@ -0,0 +1,34 @@
from typing import Optional
from langchain.callbacks import CallbackManager
from core.callback_handler.std_out_callback_handler import DifyStdOutCallbackHandler
from core.chain.sensitive_word_avoidance_chain import SensitiveWordAvoidanceChain
from core.chain.tool_chain import ToolChain
class ChainBuilder:
@classmethod
def to_tool_chain(cls, tool, **kwargs) -> ToolChain:
return ToolChain(
tool=tool,
input_key=kwargs.get('input_key', 'input'),
output_key=kwargs.get('output_key', 'tool_output'),
callback_manager=CallbackManager([DifyStdOutCallbackHandler()])
)
@classmethod
def to_sensitive_word_avoidance_chain(cls, tool_config: dict, **kwargs) -> Optional[
SensitiveWordAvoidanceChain]:
sensitive_words = tool_config.get("words", "")
if tool_config.get("enabled", False) \
and sensitive_words:
return SensitiveWordAvoidanceChain(
sensitive_words=sensitive_words.split(","),
canned_response=tool_config.get("canned_response", ''),
output_key="sensitive_word_avoidance_output",
callback_manager=CallbackManager([DifyStdOutCallbackHandler()]),
**kwargs
)
return None

View File

@ -0,0 +1,116 @@
from typing import Optional, List
from langchain.callbacks import SharedCallbackManager
from langchain.chains import SequentialChain
from langchain.chains.base import Chain
from langchain.memory.chat_memory import BaseChatMemory
from core.agent.agent_builder import AgentBuilder
from core.callback_handler.agent_loop_gather_callback_handler import AgentLoopGatherCallbackHandler
from core.callback_handler.dataset_tool_callback_handler import DatasetToolCallbackHandler
from core.callback_handler.main_chain_gather_callback_handler import MainChainGatherCallbackHandler
from core.chain.chain_builder import ChainBuilder
from core.constant import llm_constant
from core.conversation_message_task import ConversationMessageTask
from core.tool.dataset_tool_builder import DatasetToolBuilder
class MainChainBuilder:
@classmethod
def to_langchain_components(cls, tenant_id: str, agent_mode: dict, memory: Optional[BaseChatMemory],
conversation_message_task: ConversationMessageTask):
first_input_key = "input"
final_output_key = "output"
chains = []
chain_callback_handler = MainChainGatherCallbackHandler(conversation_message_task)
# agent mode
tool_chains, chains_output_key = cls.get_agent_chains(
tenant_id=tenant_id,
agent_mode=agent_mode,
memory=memory,
dataset_tool_callback_handler=DatasetToolCallbackHandler(conversation_message_task),
agent_loop_gather_callback_handler=chain_callback_handler.agent_loop_gather_callback_handler
)
chains += tool_chains
if chains_output_key:
final_output_key = chains_output_key
if len(chains) == 0:
return None
for chain in chains:
# do not add handler into singleton callback manager
if not isinstance(chain.callback_manager, SharedCallbackManager):
chain.callback_manager.add_handler(chain_callback_handler)
# build main chain
overall_chain = SequentialChain(
chains=chains,
input_variables=[first_input_key],
output_variables=[final_output_key],
memory=memory, # only for use the memory prompt input key
)
return overall_chain
@classmethod
def get_agent_chains(cls, tenant_id: str, agent_mode: dict, memory: Optional[BaseChatMemory],
dataset_tool_callback_handler: DatasetToolCallbackHandler,
agent_loop_gather_callback_handler: AgentLoopGatherCallbackHandler):
# agent mode
chains = []
if agent_mode and agent_mode.get('enabled'):
tools = agent_mode.get('tools', [])
pre_fixed_chains = []
agent_tools = []
for tool in tools:
tool_type = list(tool.keys())[0]
tool_config = list(tool.values())[0]
if tool_type == 'sensitive-word-avoidance':
chain = ChainBuilder.to_sensitive_word_avoidance_chain(tool_config)
if chain:
pre_fixed_chains.append(chain)
elif tool_type == "dataset":
dataset_tool = DatasetToolBuilder.build_dataset_tool(
tenant_id=tenant_id,
dataset_id=tool_config.get("id"),
response_mode='no_synthesizer', # "compact"
callback_handler=dataset_tool_callback_handler
)
if dataset_tool:
agent_tools.append(dataset_tool)
# add pre-fixed chains
chains += pre_fixed_chains
if len(agent_tools) == 1:
# tool to chain
tool_chain = ChainBuilder.to_tool_chain(tool=agent_tools[0], output_key='tool_output')
chains.append(tool_chain)
elif len(agent_tools) > 1:
# build agent config
agent_chain = AgentBuilder.to_agent_chain(
tenant_id=tenant_id,
tools=agent_tools,
memory=memory,
dataset_tool_callback_handler=dataset_tool_callback_handler,
agent_loop_gather_callback_handler=agent_loop_gather_callback_handler
)
chains.append(agent_chain)
final_output_key = cls.get_chains_output_key(chains)
return chains, final_output_key
@classmethod
def get_chains_output_key(cls, chains: List[Chain]):
if len(chains) > 0:
return chains[-1].output_keys[0]
return None

View File

@ -0,0 +1,42 @@
from typing import List, Dict
from langchain.chains.base import Chain
class SensitiveWordAvoidanceChain(Chain):
input_key: str = "input" #: :meta private:
output_key: str = "output" #: :meta private:
sensitive_words: List[str] = []
canned_response: str = None
@property
def _chain_type(self) -> str:
return "sensitive_word_avoidance_chain"
@property
def input_keys(self) -> List[str]:
"""Expect input key.
:meta private:
"""
return [self.input_key]
@property
def output_keys(self) -> List[str]:
"""Return output key.
:meta private:
"""
return [self.output_key]
def _check_sensitive_word(self, text: str) -> str:
for word in self.sensitive_words:
if word in text:
return self.canned_response
return text
def _call(self, inputs: Dict[str, str]) -> Dict[str, str]:
text = inputs[self.input_key]
output = self._check_sensitive_word(text)
return {self.output_key: output}

View File

@ -0,0 +1,42 @@
from typing import List, Dict
from langchain.chains.base import Chain
from langchain.tools import BaseTool
class ToolChain(Chain):
input_key: str = "input" #: :meta private:
output_key: str = "output" #: :meta private:
tool: BaseTool
@property
def _chain_type(self) -> str:
return "tool_chain"
@property
def input_keys(self) -> List[str]:
"""Expect input key.
:meta private:
"""
return [self.input_key]
@property
def output_keys(self) -> List[str]:
"""Return output key.
:meta private:
"""
return [self.output_key]
def _call(self, inputs: Dict[str, str]) -> Dict[str, str]:
input = inputs[self.input_key]
output = self.tool.run(input, self.verbose)
return {self.output_key: output}
async def _acall(self, inputs: Dict[str, str]) -> Dict[str, str]:
"""Run the logic of this chain and return the output."""
input = inputs[self.input_key]
output = await self.tool.arun(input, self.verbose)
return {self.output_key: output}

326
api/core/completion.py Normal file
View File

@ -0,0 +1,326 @@
from typing import Optional, List, Union
from langchain.callbacks import CallbackManager
from langchain.chat_models.base import BaseChatModel
from langchain.llms import BaseLLM
from langchain.schema import BaseMessage, BaseLanguageModel, HumanMessage
from core.constant import llm_constant
from core.callback_handler.llm_callback_handler import LLMCallbackHandler
from core.callback_handler.std_out_callback_handler import DifyStreamingStdOutCallbackHandler, \
DifyStdOutCallbackHandler
from core.conversation_message_task import ConversationMessageTask, ConversationTaskStoppedException
from core.llm.error import LLMBadRequestError
from core.llm.llm_builder import LLMBuilder
from core.chain.main_chain_builder import MainChainBuilder
from core.llm.streamable_chat_open_ai import StreamableChatOpenAI
from core.llm.streamable_open_ai import StreamableOpenAI
from core.memory.read_only_conversation_token_db_buffer_shared_memory import \
ReadOnlyConversationTokenDBBufferSharedMemory
from core.memory.read_only_conversation_token_db_string_buffer_shared_memory import \
ReadOnlyConversationTokenDBStringBufferSharedMemory
from core.prompt.prompt_builder import PromptBuilder
from core.prompt.prompt_template import OutLinePromptTemplate
from core.prompt.prompts import MORE_LIKE_THIS_GENERATE_PROMPT
from models.model import App, AppModelConfig, Account, Conversation, Message
class Completion:
@classmethod
def generate(cls, task_id: str, app: App, app_model_config: AppModelConfig, query: str, inputs: dict,
user: Account, conversation: Optional[Conversation], streaming: bool, is_override: bool = False):
"""
errors: ProviderTokenNotInitError
"""
cls.validate_query_tokens(app.tenant_id, app_model_config, query)
memory = None
if conversation:
# get memory of conversation (read-only)
memory = cls.get_memory_from_conversation(
tenant_id=app.tenant_id,
app_model_config=app_model_config,
conversation=conversation
)
inputs = conversation.inputs
conversation_message_task = ConversationMessageTask(
task_id=task_id,
app=app,
app_model_config=app_model_config,
user=user,
conversation=conversation,
is_override=is_override,
inputs=inputs,
query=query,
streaming=streaming
)
# build main chain include agent
main_chain = MainChainBuilder.to_langchain_components(
tenant_id=app.tenant_id,
agent_mode=app_model_config.agent_mode_dict,
memory=ReadOnlyConversationTokenDBStringBufferSharedMemory(memory=memory) if memory else None,
conversation_message_task=conversation_message_task
)
chain_output = ''
if main_chain:
chain_output = main_chain.run(query)
# run the final llm
try:
cls.run_final_llm(
tenant_id=app.tenant_id,
mode=app.mode,
app_model_config=app_model_config,
query=query,
inputs=inputs,
chain_output=chain_output,
conversation_message_task=conversation_message_task,
memory=memory,
streaming=streaming
)
except ConversationTaskStoppedException:
return
@classmethod
def run_final_llm(cls, tenant_id: str, mode: str, app_model_config: AppModelConfig, query: str, inputs: dict,
chain_output: str,
conversation_message_task: ConversationMessageTask,
memory: Optional[ReadOnlyConversationTokenDBBufferSharedMemory], streaming: bool):
final_llm = LLMBuilder.to_llm_from_model(
tenant_id=tenant_id,
model=app_model_config.model_dict,
streaming=streaming
)
# get llm prompt
prompt = cls.get_main_llm_prompt(
mode=mode,
llm=final_llm,
pre_prompt=app_model_config.pre_prompt,
query=query,
inputs=inputs,
chain_output=chain_output,
memory=memory
)
final_llm.callback_manager = cls.get_llm_callback_manager(final_llm, streaming, conversation_message_task)
cls.recale_llm_max_tokens(
final_llm=final_llm,
prompt=prompt,
mode=mode
)
response = final_llm.generate([prompt])
return response
@classmethod
def get_main_llm_prompt(cls, mode: str, llm: BaseLanguageModel, pre_prompt: str, query: str, inputs: dict, chain_output: Optional[str],
memory: Optional[ReadOnlyConversationTokenDBBufferSharedMemory]) -> \
Union[str | List[BaseMessage]]:
pre_prompt = PromptBuilder.process_template(pre_prompt) if pre_prompt else pre_prompt
if mode == 'completion':
prompt_template = OutLinePromptTemplate.from_template(
template=("Use the following pieces of [CONTEXT] to answer the question at the end. "
"If you don't know the answer, "
"just say that you don't know, don't try to make up an answer. \n"
"```\n"
"[CONTEXT]\n"
"{context}\n"
"```\n" if chain_output else "")
+ (pre_prompt + "\n" if pre_prompt else "")
+ "{query}\n"
)
if chain_output:
inputs['context'] = chain_output
prompt_inputs = {k: inputs[k] for k in prompt_template.input_variables if k in inputs}
prompt_content = prompt_template.format(
query=query,
**prompt_inputs
)
if isinstance(llm, BaseChatModel):
# use chat llm as completion model
return [HumanMessage(content=prompt_content)]
else:
return prompt_content
else:
messages: List[BaseMessage] = []
system_message = None
if pre_prompt:
# append pre prompt as system message
system_message = PromptBuilder.to_system_message(pre_prompt, inputs)
if chain_output:
# append context as system message, currently only use simple stuff prompt
context_message = PromptBuilder.to_system_message(
"""Use the following pieces of [CONTEXT] to answer the users question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
```
[CONTEXT]
{context}
```""",
{'context': chain_output}
)
if not system_message:
system_message = context_message
else:
system_message.content = context_message.content + "\n\n" + system_message.content
if system_message:
messages.append(system_message)
human_inputs = {
"query": query
}
# construct main prompt
human_message = PromptBuilder.to_human_message(
prompt_content="{query}",
inputs=human_inputs
)
if memory:
# append chat histories
tmp_messages = messages.copy() + [human_message]
curr_message_tokens = memory.llm.get_messages_tokens(tmp_messages)
rest_tokens = llm_constant.max_context_token_length[
memory.llm.model_name] - memory.llm.max_tokens - curr_message_tokens
rest_tokens = max(rest_tokens, 0)
history_messages = cls.get_history_messages_from_memory(memory, rest_tokens)
messages += history_messages
messages.append(human_message)
return messages
@classmethod
def get_llm_callback_manager(cls, llm: Union[StreamableOpenAI, StreamableChatOpenAI],
streaming: bool, conversation_message_task: ConversationMessageTask) -> CallbackManager:
llm_callback_handler = LLMCallbackHandler(llm, conversation_message_task)
if streaming:
callback_handlers = [llm_callback_handler, DifyStreamingStdOutCallbackHandler()]
else:
callback_handlers = [llm_callback_handler, DifyStdOutCallbackHandler()]
return CallbackManager(callback_handlers)
@classmethod
def get_history_messages_from_memory(cls, memory: ReadOnlyConversationTokenDBBufferSharedMemory,
max_token_limit: int) -> \
List[BaseMessage]:
"""Get memory messages."""
memory.max_token_limit = max_token_limit
memory_key = memory.memory_variables[0]
external_context = memory.load_memory_variables({})
return external_context[memory_key]
@classmethod
def get_memory_from_conversation(cls, tenant_id: str, app_model_config: AppModelConfig,
conversation: Conversation,
**kwargs) -> ReadOnlyConversationTokenDBBufferSharedMemory:
# only for calc token in memory
memory_llm = LLMBuilder.to_llm_from_model(
tenant_id=tenant_id,
model=app_model_config.model_dict
)
# use llm config from conversation
memory = ReadOnlyConversationTokenDBBufferSharedMemory(
conversation=conversation,
llm=memory_llm,
max_token_limit=kwargs.get("max_token_limit", 2048),
memory_key=kwargs.get("memory_key", "chat_history"),
return_messages=kwargs.get("return_messages", True),
input_key=kwargs.get("input_key", "input"),
output_key=kwargs.get("output_key", "output"),
message_limit=kwargs.get("message_limit", 10),
)
return memory
@classmethod
def validate_query_tokens(cls, tenant_id: str, app_model_config: AppModelConfig, query: str):
llm = LLMBuilder.to_llm_from_model(
tenant_id=tenant_id,
model=app_model_config.model_dict
)
model_limited_tokens = llm_constant.max_context_token_length[llm.model_name]
max_tokens = llm.max_tokens
if model_limited_tokens - max_tokens - llm.get_num_tokens(query) < 0:
raise LLMBadRequestError("Query is too long")
@classmethod
def recale_llm_max_tokens(cls, final_llm: Union[StreamableOpenAI, StreamableChatOpenAI],
prompt: Union[str, List[BaseMessage]], mode: str):
# recalc max_tokens if sum(prompt_token + max_tokens) over model token limit
model_limited_tokens = llm_constant.max_context_token_length[final_llm.model_name]
max_tokens = final_llm.max_tokens
if mode == 'completion' and isinstance(final_llm, BaseLLM):
prompt_tokens = final_llm.get_num_tokens(prompt)
else:
prompt_tokens = final_llm.get_messages_tokens(prompt)
if prompt_tokens + max_tokens > model_limited_tokens:
max_tokens = max(model_limited_tokens - prompt_tokens, 16)
final_llm.max_tokens = max_tokens
@classmethod
def generate_more_like_this(cls, task_id: str, app: App, message: Message, pre_prompt: str,
app_model_config: AppModelConfig, user: Account, streaming: bool):
llm: StreamableOpenAI = LLMBuilder.to_llm(
tenant_id=app.tenant_id,
model_name='gpt-3.5-turbo',
streaming=streaming
)
# get llm prompt
original_prompt = cls.get_main_llm_prompt(
mode="completion",
llm=llm,
pre_prompt=pre_prompt,
query=message.query,
inputs=message.inputs,
chain_output=None,
memory=None
)
original_completion = message.answer.strip()
prompt = MORE_LIKE_THIS_GENERATE_PROMPT
prompt = prompt.format(prompt=original_prompt, original_completion=original_completion)
if isinstance(llm, BaseChatModel):
prompt = [HumanMessage(content=prompt)]
conversation_message_task = ConversationMessageTask(
task_id=task_id,
app=app,
app_model_config=app_model_config,
user=user,
inputs=message.inputs,
query=message.query,
is_override=True if message.override_model_configs else False,
streaming=streaming
)
llm.callback_manager = cls.get_llm_callback_manager(llm, streaming, conversation_message_task)
cls.recale_llm_max_tokens(
final_llm=llm,
prompt=prompt,
mode='completion'
)
llm.generate([prompt])

View File

@ -0,0 +1,84 @@
from _decimal import Decimal
models = {
'gpt-4': 'openai', # 8,192 tokens
'gpt-4-32k': 'openai', # 32,768 tokens
'gpt-3.5-turbo': 'openai', # 4,096 tokens
'text-davinci-003': 'openai', # 4,097 tokens
'text-davinci-002': 'openai', # 4,097 tokens
'text-curie-001': 'openai', # 2,049 tokens
'text-babbage-001': 'openai', # 2,049 tokens
'text-ada-001': 'openai', # 2,049 tokens
'text-embedding-ada-002': 'openai' # 8191 tokens, 1536 dimensions
}
max_context_token_length = {
'gpt-4': 8192,
'gpt-4-32k': 32768,
'gpt-3.5-turbo': 4096,
'text-davinci-003': 4097,
'text-davinci-002': 4097,
'text-curie-001': 2049,
'text-babbage-001': 2049,
'text-ada-001': 2049,
'text-embedding-ada-002': 8191
}
models_by_mode = {
'chat': [
'gpt-4', # 8,192 tokens
'gpt-4-32k', # 32,768 tokens
'gpt-3.5-turbo', # 4,096 tokens
],
'completion': [
'gpt-4', # 8,192 tokens
'gpt-4-32k', # 32,768 tokens
'gpt-3.5-turbo', # 4,096 tokens
'text-davinci-003', # 4,097 tokens
'text-davinci-002' # 4,097 tokens
'text-curie-001', # 2,049 tokens
'text-babbage-001', # 2,049 tokens
'text-ada-001' # 2,049 tokens
],
'embedding': [
'text-embedding-ada-002' # 8191 tokens, 1536 dimensions
]
}
model_currency = 'USD'
model_prices = {
'gpt-4': {
'prompt': Decimal('0.03'),
'completion': Decimal('0.06'),
},
'gpt-4-32k': {
'prompt': Decimal('0.06'),
'completion': Decimal('0.12')
},
'gpt-3.5-turbo': {
'prompt': Decimal('0.002'),
'completion': Decimal('0.002')
},
'text-davinci-003': {
'prompt': Decimal('0.02'),
'completion': Decimal('0.02')
},
'text-curie-001': {
'prompt': Decimal('0.002'),
'completion': Decimal('0.002')
},
'text-babbage-001': {
'prompt': Decimal('0.0005'),
'completion': Decimal('0.0005')
},
'text-ada-001': {
'prompt': Decimal('0.0004'),
'completion': Decimal('0.0004')
},
'text-embedding-ada-002': {
'usage': Decimal('0.0004'),
}
}
agent_model_name = 'text-davinci-003'

View File

@ -0,0 +1,388 @@
import decimal
import json
from typing import Optional, Union
from gunicorn.config import User
from core.callback_handler.entity.agent_loop import AgentLoop
from core.callback_handler.entity.dataset_query import DatasetQueryObj
from core.callback_handler.entity.llm_message import LLMMessage
from core.callback_handler.entity.chain_result import ChainResult
from core.constant import llm_constant
from core.llm.llm_builder import LLMBuilder
from core.llm.provider.llm_provider_service import LLMProviderService
from core.prompt.prompt_builder import PromptBuilder
from core.prompt.prompt_template import OutLinePromptTemplate
from events.message_event import message_was_created
from extensions.ext_database import db
from extensions.ext_redis import redis_client
from models.dataset import DatasetQuery
from models.model import AppModelConfig, Conversation, Account, Message, EndUser, App, MessageAgentThought, MessageChain
from models.provider import ProviderType, Provider
class ConversationMessageTask:
def __init__(self, task_id: str, app: App, app_model_config: AppModelConfig, user: Account,
inputs: dict, query: str, streaming: bool,
conversation: Optional[Conversation] = None, is_override: bool = False):
self.task_id = task_id
self.app = app
self.tenant_id = app.tenant_id
self.app_model_config = app_model_config
self.is_override = is_override
self.user = user
self.inputs = inputs
self.query = query
self.streaming = streaming
self.conversation = conversation
self.is_new_conversation = False
self.message = None
self.model_dict = self.app_model_config.model_dict
self.model_name = self.model_dict.get('name')
self.mode = app.mode
self.init()
self._pub_handler = PubHandler(
user=self.user,
task_id=self.task_id,
message=self.message,
conversation=self.conversation,
chain_pub=False, # disabled currently
agent_thought_pub=False # disabled currently
)
def init(self):
override_model_configs = None
if self.is_override:
override_model_configs = {
"model": self.app_model_config.model_dict,
"pre_prompt": self.app_model_config.pre_prompt,
"agent_mode": self.app_model_config.agent_mode_dict,
"opening_statement": self.app_model_config.opening_statement,
"suggested_questions": self.app_model_config.suggested_questions_list,
"suggested_questions_after_answer": self.app_model_config.suggested_questions_after_answer_dict,
"more_like_this": self.app_model_config.more_like_this_dict,
"user_input_form": self.app_model_config.user_input_form_list,
}
introduction = ''
system_instruction = ''
system_instruction_tokens = 0
if self.mode == 'chat':
introduction = self.app_model_config.opening_statement
if introduction:
prompt_template = OutLinePromptTemplate.from_template(template=PromptBuilder.process_template(introduction))
prompt_inputs = {k: self.inputs[k] for k in prompt_template.input_variables if k in self.inputs}
introduction = prompt_template.format(**prompt_inputs)
if self.app_model_config.pre_prompt:
pre_prompt = PromptBuilder.process_template(self.app_model_config.pre_prompt)
system_message = PromptBuilder.to_system_message(pre_prompt, self.inputs)
system_instruction = system_message.content
llm = LLMBuilder.to_llm(self.tenant_id, self.model_name)
system_instruction_tokens = llm.get_messages_tokens([system_message])
if not self.conversation:
self.is_new_conversation = True
self.conversation = Conversation(
app_id=self.app_model_config.app_id,
app_model_config_id=self.app_model_config.id,
model_provider=self.model_dict.get('provider'),
model_id=self.model_name,
override_model_configs=json.dumps(override_model_configs) if override_model_configs else None,
mode=self.mode,
name='',
inputs=self.inputs,
introduction=introduction,
system_instruction=system_instruction,
system_instruction_tokens=system_instruction_tokens,
status='normal',
from_source=('console' if isinstance(self.user, Account) else 'api'),
from_end_user_id=(self.user.id if isinstance(self.user, EndUser) else None),
from_account_id=(self.user.id if isinstance(self.user, Account) else None),
)
db.session.add(self.conversation)
db.session.flush()
self.message = Message(
app_id=self.app_model_config.app_id,
model_provider=self.model_dict.get('provider'),
model_id=self.model_name,
override_model_configs=json.dumps(override_model_configs) if override_model_configs else None,
conversation_id=self.conversation.id,
inputs=self.inputs,
query=self.query,
message="",
message_tokens=0,
message_unit_price=0,
answer="",
answer_tokens=0,
answer_unit_price=0,
provider_response_latency=0,
total_price=0,
currency=llm_constant.model_currency,
from_source=('console' if isinstance(self.user, Account) else 'api'),
from_end_user_id=(self.user.id if isinstance(self.user, EndUser) else None),
from_account_id=(self.user.id if isinstance(self.user, Account) else None),
agent_based=self.app_model_config.agent_mode_dict.get('enabled'),
)
db.session.add(self.message)
db.session.flush()
def append_message_text(self, text: str):
self._pub_handler.pub_text(text)
def save_message(self, llm_message: LLMMessage, by_stopped: bool = False):
model_name = self.app_model_config.model_dict.get('name')
message_tokens = llm_message.prompt_tokens
answer_tokens = llm_message.completion_tokens
message_unit_price = llm_constant.model_prices[model_name]['prompt']
answer_unit_price = llm_constant.model_prices[model_name]['completion']
total_price = self.calc_total_price(message_tokens, message_unit_price, answer_tokens, answer_unit_price)
self.message.message = llm_message.prompt
self.message.message_tokens = message_tokens
self.message.message_unit_price = message_unit_price
self.message.answer = llm_message.completion.strip() if llm_message.completion else ''
self.message.answer_tokens = answer_tokens
self.message.answer_unit_price = answer_unit_price
self.message.provider_response_latency = llm_message.latency
self.message.total_price = total_price
self.update_provider_quota()
db.session.commit()
message_was_created.send(
self.message,
conversation=self.conversation,
is_first_message=self.is_new_conversation
)
if not by_stopped:
self._pub_handler.pub_end()
def update_provider_quota(self):
llm_provider_service = LLMProviderService(
tenant_id=self.app.tenant_id,
provider_name=self.message.model_provider,
)
provider = llm_provider_service.get_provider_db_record()
if provider and provider.provider_type == ProviderType.SYSTEM.value:
db.session.query(Provider).filter(
Provider.tenant_id == self.app.tenant_id,
Provider.quota_limit > Provider.quota_used
).update({'quota_used': Provider.quota_used + 1})
def init_chain(self, chain_result: ChainResult):
message_chain = MessageChain(
message_id=self.message.id,
type=chain_result.type,
input=json.dumps(chain_result.prompt),
output=''
)
db.session.add(message_chain)
db.session.flush()
return message_chain
def on_chain_end(self, message_chain: MessageChain, chain_result: ChainResult):
message_chain.output = json.dumps(chain_result.completion)
self._pub_handler.pub_chain(message_chain)
def on_agent_end(self, message_chain: MessageChain, agent_model_name: str,
agent_loop: AgentLoop):
agent_message_unit_price = llm_constant.model_prices[agent_model_name]['prompt']
agent_answer_unit_price = llm_constant.model_prices[agent_model_name]['completion']
loop_message_tokens = agent_loop.prompt_tokens
loop_answer_tokens = agent_loop.completion_tokens
loop_total_price = self.calc_total_price(
loop_message_tokens,
agent_message_unit_price,
loop_answer_tokens,
agent_answer_unit_price
)
message_agent_loop = MessageAgentThought(
message_id=self.message.id,
message_chain_id=message_chain.id,
position=agent_loop.position,
thought=agent_loop.thought,
tool=agent_loop.tool_name,
tool_input=agent_loop.tool_input,
observation=agent_loop.tool_output,
tool_process_data='', # currently not support
message=agent_loop.prompt,
message_token=loop_message_tokens,
message_unit_price=agent_message_unit_price,
answer=agent_loop.completion,
answer_token=loop_answer_tokens,
answer_unit_price=agent_answer_unit_price,
latency=agent_loop.latency,
tokens=agent_loop.prompt_tokens + agent_loop.completion_tokens,
total_price=loop_total_price,
currency=llm_constant.model_currency,
created_by_role=('account' if isinstance(self.user, Account) else 'end_user'),
created_by=self.user.id
)
db.session.add(message_agent_loop)
db.session.flush()
self._pub_handler.pub_agent_thought(message_agent_loop)
def on_dataset_query_end(self, dataset_query_obj: DatasetQueryObj):
dataset_query = DatasetQuery(
dataset_id=dataset_query_obj.dataset_id,
content=dataset_query_obj.query,
source='app',
source_app_id=self.app.id,
created_by_role=('account' if isinstance(self.user, Account) else 'end_user'),
created_by=self.user.id
)
db.session.add(dataset_query)
def calc_total_price(self, message_tokens, message_unit_price, answer_tokens, answer_unit_price):
message_tokens_per_1k = (decimal.Decimal(message_tokens) / 1000).quantize(decimal.Decimal('0.001'),
rounding=decimal.ROUND_HALF_UP)
answer_tokens_per_1k = (decimal.Decimal(answer_tokens) / 1000).quantize(decimal.Decimal('0.001'),
rounding=decimal.ROUND_HALF_UP)
total_price = message_tokens_per_1k * message_unit_price + answer_tokens_per_1k * answer_unit_price
return total_price.quantize(decimal.Decimal('0.0000001'), rounding=decimal.ROUND_HALF_UP)
class PubHandler:
def __init__(self, user: Union[Account | User], task_id: str,
message: Message, conversation: Conversation,
chain_pub: bool = False, agent_thought_pub: bool = False):
self._channel = PubHandler.generate_channel_name(user, task_id)
self._stopped_cache_key = PubHandler.generate_stopped_cache_key(user, task_id)
self._task_id = task_id
self._message = message
self._conversation = conversation
self._chain_pub = chain_pub
self._agent_thought_pub = agent_thought_pub
@classmethod
def generate_channel_name(cls, user: Union[Account | User], task_id: str):
user_str = 'account-' + user.id if isinstance(user, Account) else 'end-user-' + user.id
return "generate_result:{}-{}".format(user_str, task_id)
@classmethod
def generate_stopped_cache_key(cls, user: Union[Account | User], task_id: str):
user_str = 'account-' + user.id if isinstance(user, Account) else 'end-user-' + user.id
return "generate_result_stopped:{}-{}".format(user_str, task_id)
def pub_text(self, text: str):
content = {
'event': 'message',
'data': {
'task_id': self._task_id,
'message_id': self._message.id,
'text': text,
'mode': self._conversation.mode,
'conversation_id': self._conversation.id
}
}
redis_client.publish(self._channel, json.dumps(content))
if self._is_stopped():
self.pub_end()
raise ConversationTaskStoppedException()
def pub_chain(self, message_chain: MessageChain):
if self._chain_pub:
content = {
'event': 'chain',
'data': {
'task_id': self._task_id,
'message_id': self._message.id,
'chain_id': message_chain.id,
'type': message_chain.type,
'input': json.loads(message_chain.input),
'output': json.loads(message_chain.output),
'mode': self._conversation.mode,
'conversation_id': self._conversation.id
}
}
redis_client.publish(self._channel, json.dumps(content))
if self._is_stopped():
self.pub_end()
raise ConversationTaskStoppedException()
def pub_agent_thought(self, message_agent_thought: MessageAgentThought):
if self._agent_thought_pub:
content = {
'event': 'agent_thought',
'data': {
'task_id': self._task_id,
'message_id': self._message.id,
'chain_id': message_agent_thought.message_chain_id,
'agent_thought_id': message_agent_thought.id,
'position': message_agent_thought.position,
'thought': message_agent_thought.thought,
'tool': message_agent_thought.tool,
'tool_input': message_agent_thought.tool_input,
'observation': message_agent_thought.observation,
'answer': message_agent_thought.answer,
'mode': self._conversation.mode,
'conversation_id': self._conversation.id
}
}
redis_client.publish(self._channel, json.dumps(content))
if self._is_stopped():
self.pub_end()
raise ConversationTaskStoppedException()
def pub_end(self):
content = {
'event': 'end',
}
redis_client.publish(self._channel, json.dumps(content))
@classmethod
def pub_error(cls, user: Union[Account | User], task_id: str, e):
content = {
'error': type(e).__name__,
'description': e.description if getattr(e, 'description', None) is not None else str(e)
}
channel = cls.generate_channel_name(user, task_id)
redis_client.publish(channel, json.dumps(content))
def _is_stopped(self):
return redis_client.get(self._stopped_cache_key) is not None
@classmethod
def stop(cls, user: Union[Account | User], task_id: str):
stopped_cache_key = cls.generate_stopped_cache_key(user, task_id)
redis_client.setex(stopped_cache_key, 600, 1)
class ConversationTaskStoppedException(Exception):
pass

View File

@ -0,0 +1,190 @@
from typing import Any, Dict, Optional, Sequence
import tiktoken
from llama_index.data_structs import Node
from llama_index.docstore.types import BaseDocumentStore
from llama_index.docstore.utils import json_to_doc
from llama_index.schema import BaseDocument
from sqlalchemy import func
from core.llm.token_calculator import TokenCalculator
from extensions.ext_database import db
from models.dataset import Dataset, DocumentSegment
class DatesetDocumentStore(BaseDocumentStore):
def __init__(
self,
dataset: Dataset,
user_id: str,
embedding_model_name: str,
document_id: Optional[str] = None,
):
self._dataset = dataset
self._user_id = user_id
self._embedding_model_name = embedding_model_name
self._document_id = document_id
@classmethod
def from_dict(cls, config_dict: Dict[str, Any]) -> "DatesetDocumentStore":
return cls(**config_dict)
def to_dict(self) -> Dict[str, Any]:
"""Serialize to dict."""
return {
"dataset_id": self._dataset.id,
}
@property
def dateset_id(self) -> Any:
return self._dataset.id
@property
def user_id(self) -> Any:
return self._user_id
@property
def embedding_model_name(self) -> Any:
return self._embedding_model_name
@property
def docs(self) -> Dict[str, BaseDocument]:
document_segments = db.session.query(DocumentSegment).filter(
DocumentSegment.dataset_id == self._dataset.id
).all()
output = {}
for document_segment in document_segments:
doc_id = document_segment.index_node_id
result = self.segment_to_dict(document_segment)
output[doc_id] = json_to_doc(result)
return output
def add_documents(
self, docs: Sequence[BaseDocument], allow_update: bool = True
) -> None:
max_position = db.session.query(func.max(DocumentSegment.position)).filter(
DocumentSegment.document == self._document_id
).scalar()
if max_position is None:
max_position = 0
for doc in docs:
if doc.is_doc_id_none:
raise ValueError("doc_id not set")
if not isinstance(doc, Node):
raise ValueError("doc must be a Node")
segment_document = self.get_document(doc_id=doc.get_doc_id(), raise_error=False)
# NOTE: doc could already exist in the store, but we overwrite it
if not allow_update and segment_document:
raise ValueError(
f"doc_id {doc.get_doc_id()} already exists. "
"Set allow_update to True to overwrite."
)
# calc embedding use tokens
tokens = TokenCalculator.get_num_tokens(self._embedding_model_name, doc.get_text())
if not segment_document:
max_position += 1
segment_document = DocumentSegment(
tenant_id=self._dataset.tenant_id,
dataset_id=self._dataset.id,
document_id=self._document_id,
index_node_id=doc.get_doc_id(),
index_node_hash=doc.get_doc_hash(),
position=max_position,
content=doc.get_text(),
word_count=len(doc.get_text()),
tokens=tokens,
created_by=self._user_id,
)
db.session.add(segment_document)
else:
segment_document.content = doc.get_text()
segment_document.index_node_hash = doc.get_doc_hash()
segment_document.word_count = len(doc.get_text())
segment_document.tokens = tokens
db.session.commit()
def document_exists(self, doc_id: str) -> bool:
"""Check if document exists."""
result = self.get_document_segment(doc_id)
return result is not None
def get_document(
self, doc_id: str, raise_error: bool = True
) -> Optional[BaseDocument]:
document_segment = self.get_document_segment(doc_id)
if document_segment is None:
if raise_error:
raise ValueError(f"doc_id {doc_id} not found.")
else:
return None
result = self.segment_to_dict(document_segment)
return json_to_doc(result)
def delete_document(self, doc_id: str, raise_error: bool = True) -> None:
document_segment = self.get_document_segment(doc_id)
if document_segment is None:
if raise_error:
raise ValueError(f"doc_id {doc_id} not found.")
else:
return None
db.session.delete(document_segment)
db.session.commit()
def set_document_hash(self, doc_id: str, doc_hash: str) -> None:
"""Set the hash for a given doc_id."""
document_segment = self.get_document_segment(doc_id)
if document_segment is None:
return None
document_segment.index_node_hash = doc_hash
db.session.commit()
def get_document_hash(self, doc_id: str) -> Optional[str]:
"""Get the stored hash for a document, if it exists."""
document_segment = self.get_document_segment(doc_id)
if document_segment is None:
return None
return document_segment.index_node_hash
def update_docstore(self, other: "BaseDocumentStore") -> None:
"""Update docstore.
Args:
other (BaseDocumentStore): docstore to update from
"""
self.add_documents(list(other.docs.values()))
def get_document_segment(self, doc_id: str) -> DocumentSegment:
document_segment = db.session.query(DocumentSegment).filter(
DocumentSegment.dataset_id == self._dataset.id,
DocumentSegment.index_node_id == doc_id
).first()
return document_segment
def segment_to_dict(self, segment: DocumentSegment) -> Dict[str, Any]:
return {
"doc_id": segment.index_node_id,
"doc_hash": segment.index_node_hash,
"text": segment.content,
"__type__": Node.get_type()
}

View File

@ -0,0 +1,51 @@
from typing import Any, Dict, Optional, Sequence
from llama_index.docstore.types import BaseDocumentStore
from llama_index.schema import BaseDocument
class EmptyDocumentStore(BaseDocumentStore):
@classmethod
def from_dict(cls, config_dict: Dict[str, Any]) -> "EmptyDocumentStore":
return cls()
def to_dict(self) -> Dict[str, Any]:
"""Serialize to dict."""
return {}
@property
def docs(self) -> Dict[str, BaseDocument]:
return {}
def add_documents(
self, docs: Sequence[BaseDocument], allow_update: bool = True
) -> None:
pass
def document_exists(self, doc_id: str) -> bool:
"""Check if document exists."""
return False
def get_document(
self, doc_id: str, raise_error: bool = True
) -> Optional[BaseDocument]:
return None
def delete_document(self, doc_id: str, raise_error: bool = True) -> None:
pass
def set_document_hash(self, doc_id: str, doc_hash: str) -> None:
"""Set the hash for a given doc_id."""
pass
def get_document_hash(self, doc_id: str) -> Optional[str]:
"""Get the stored hash for a document, if it exists."""
return None
def update_docstore(self, other: "BaseDocumentStore") -> None:
"""Update docstore.
Args:
other (BaseDocumentStore): docstore to update from
"""
self.add_documents(list(other.docs.values()))

View File

@ -0,0 +1,176 @@
from typing import Optional, Any, List
import openai
from llama_index.embeddings.base import BaseEmbedding
from llama_index.embeddings.openai import OpenAIEmbeddingMode, OpenAIEmbeddingModelType, _QUERY_MODE_MODEL_DICT, \
_TEXT_MODE_MODEL_DICT
from tenacity import wait_random_exponential, retry, stop_after_attempt
from core.llm.error_handle_wraps import handle_llm_exceptions, handle_llm_exceptions_async
@retry(reraise=True, wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
def get_embedding(
text: str,
engine: Optional[str] = None,
openai_api_key: Optional[str] = None,
) -> List[float]:
"""Get embedding.
NOTE: Copied from OpenAI's embedding utils:
https://github.com/openai/openai-python/blob/main/openai/embeddings_utils.py
Copied here to avoid importing unnecessary dependencies
like matplotlib, plotly, scipy, sklearn.
"""
text = text.replace("\n", " ")
return openai.Embedding.create(input=[text], engine=engine, api_key=openai_api_key)["data"][0]["embedding"]
@retry(reraise=True, wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
async def aget_embedding(text: str, engine: Optional[str] = None, openai_api_key: Optional[str] = None) -> List[float]:
"""Asynchronously get embedding.
NOTE: Copied from OpenAI's embedding utils:
https://github.com/openai/openai-python/blob/main/openai/embeddings_utils.py
Copied here to avoid importing unnecessary dependencies
like matplotlib, plotly, scipy, sklearn.
"""
# replace newlines, which can negatively affect performance.
text = text.replace("\n", " ")
return (await openai.Embedding.acreate(input=[text], engine=engine, api_key=openai_api_key))["data"][0][
"embedding"
]
@retry(reraise=True, wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
def get_embeddings(
list_of_text: List[str],
engine: Optional[str] = None,
openai_api_key: Optional[str] = None
) -> List[List[float]]:
"""Get embeddings.
NOTE: Copied from OpenAI's embedding utils:
https://github.com/openai/openai-python/blob/main/openai/embeddings_utils.py
Copied here to avoid importing unnecessary dependencies
like matplotlib, plotly, scipy, sklearn.
"""
assert len(list_of_text) <= 2048, "The batch size should not be larger than 2048."
# replace newlines, which can negatively affect performance.
list_of_text = [text.replace("\n", " ") for text in list_of_text]
data = openai.Embedding.create(input=list_of_text, engine=engine, api_key=openai_api_key).data
data = sorted(data, key=lambda x: x["index"]) # maintain the same order as input.
return [d["embedding"] for d in data]
@retry(reraise=True, wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6))
async def aget_embeddings(
list_of_text: List[str], engine: Optional[str] = None, openai_api_key: Optional[str] = None
) -> List[List[float]]:
"""Asynchronously get embeddings.
NOTE: Copied from OpenAI's embedding utils:
https://github.com/openai/openai-python/blob/main/openai/embeddings_utils.py
Copied here to avoid importing unnecessary dependencies
like matplotlib, plotly, scipy, sklearn.
"""
assert len(list_of_text) <= 2048, "The batch size should not be larger than 2048."
# replace newlines, which can negatively affect performance.
list_of_text = [text.replace("\n", " ") for text in list_of_text]
data = (await openai.Embedding.acreate(input=list_of_text, engine=engine, api_key=openai_api_key)).data
data = sorted(data, key=lambda x: x["index"]) # maintain the same order as input.
return [d["embedding"] for d in data]
class OpenAIEmbedding(BaseEmbedding):
def __init__(
self,
mode: str = OpenAIEmbeddingMode.TEXT_SEARCH_MODE,
model: str = OpenAIEmbeddingModelType.TEXT_EMBED_ADA_002,
deployment_name: Optional[str] = None,
openai_api_key: Optional[str] = None,
**kwargs: Any,
) -> None:
"""Init params."""
super().__init__(**kwargs)
self.mode = OpenAIEmbeddingMode(mode)
self.model = OpenAIEmbeddingModelType(model)
self.deployment_name = deployment_name
self.openai_api_key = openai_api_key
@handle_llm_exceptions
def _get_query_embedding(self, query: str) -> List[float]:
"""Get query embedding."""
if self.deployment_name is not None:
engine = self.deployment_name
else:
key = (self.mode, self.model)
if key not in _QUERY_MODE_MODEL_DICT:
raise ValueError(f"Invalid mode, model combination: {key}")
engine = _QUERY_MODE_MODEL_DICT[key]
return get_embedding(query, engine=engine, openai_api_key=self.openai_api_key)
def _get_text_embedding(self, text: str) -> List[float]:
"""Get text embedding."""
if self.deployment_name is not None:
engine = self.deployment_name
else:
key = (self.mode, self.model)
if key not in _TEXT_MODE_MODEL_DICT:
raise ValueError(f"Invalid mode, model combination: {key}")
engine = _TEXT_MODE_MODEL_DICT[key]
return get_embedding(text, engine=engine, openai_api_key=self.openai_api_key)
async def _aget_text_embedding(self, text: str) -> List[float]:
"""Asynchronously get text embedding."""
if self.deployment_name is not None:
engine = self.deployment_name
else:
key = (self.mode, self.model)
if key not in _TEXT_MODE_MODEL_DICT:
raise ValueError(f"Invalid mode, model combination: {key}")
engine = _TEXT_MODE_MODEL_DICT[key]
return await aget_embedding(text, engine=engine, openai_api_key=self.openai_api_key)
def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]:
"""Get text embeddings.
By default, this is a wrapper around _get_text_embedding.
Can be overriden for batch queries.
"""
if self.deployment_name is not None:
engine = self.deployment_name
else:
key = (self.mode, self.model)
if key not in _TEXT_MODE_MODEL_DICT:
raise ValueError(f"Invalid mode, model combination: {key}")
engine = _TEXT_MODE_MODEL_DICT[key]
embeddings = get_embeddings(texts, engine=engine, openai_api_key=self.openai_api_key)
return embeddings
async def _aget_text_embeddings(self, texts: List[str]) -> List[List[float]]:
"""Asynchronously get text embeddings."""
if self.deployment_name is not None:
engine = self.deployment_name
else:
key = (self.mode, self.model)
if key not in _TEXT_MODE_MODEL_DICT:
raise ValueError(f"Invalid mode, model combination: {key}")
engine = _TEXT_MODE_MODEL_DICT[key]
embeddings = await aget_embeddings(texts, engine=engine, openai_api_key=self.openai_api_key)
return embeddings

View File

@ -0,0 +1,120 @@
import logging
from langchain.chat_models.base import BaseChatModel
from langchain.schema import HumanMessage
from core.constant import llm_constant
from core.llm.llm_builder import LLMBuilder
from core.llm.streamable_open_ai import StreamableOpenAI
from core.llm.token_calculator import TokenCalculator
from core.prompt.output_parser.suggested_questions_after_answer import SuggestedQuestionsAfterAnswerOutputParser
from core.prompt.prompt_template import OutLinePromptTemplate
from core.prompt.prompts import CONVERSATION_TITLE_PROMPT, CONVERSATION_SUMMARY_PROMPT, INTRODUCTION_GENERATE_PROMPT
# gpt-3.5-turbo works not well
generate_base_model = 'text-davinci-003'
class LLMGenerator:
@classmethod
def generate_conversation_name(cls, tenant_id: str, query, answer):
prompt = CONVERSATION_TITLE_PROMPT
prompt = prompt.format(query=query, answer=answer)
llm: StreamableOpenAI = LLMBuilder.to_llm(
tenant_id=tenant_id,
model_name=generate_base_model,
max_tokens=50
)
if isinstance(llm, BaseChatModel):
prompt = [HumanMessage(content=prompt)]
response = llm.generate([prompt])
answer = response.generations[0][0].text
return answer.strip()
@classmethod
def generate_conversation_summary(cls, tenant_id: str, messages):
max_tokens = 200
prompt = CONVERSATION_SUMMARY_PROMPT
prompt_with_empty_context = prompt.format(context='')
prompt_tokens = TokenCalculator.get_num_tokens(generate_base_model, prompt_with_empty_context)
rest_tokens = llm_constant.max_context_token_length[generate_base_model] - prompt_tokens - max_tokens
context = ''
for message in messages:
if not message.answer:
continue
message_qa_text = "Human:" + message.query + "\nAI:" + message.answer + "\n"
if rest_tokens - TokenCalculator.get_num_tokens(generate_base_model, context + message_qa_text) > 0:
context += message_qa_text
prompt = prompt.format(context=context)
llm: StreamableOpenAI = LLMBuilder.to_llm(
tenant_id=tenant_id,
model_name=generate_base_model,
max_tokens=max_tokens
)
if isinstance(llm, BaseChatModel):
prompt = [HumanMessage(content=prompt)]
response = llm.generate([prompt])
answer = response.generations[0][0].text
return answer.strip()
@classmethod
def generate_introduction(cls, tenant_id: str, pre_prompt: str):
prompt = INTRODUCTION_GENERATE_PROMPT
prompt = prompt.format(prompt=pre_prompt)
llm: StreamableOpenAI = LLMBuilder.to_llm(
tenant_id=tenant_id,
model_name=generate_base_model,
)
if isinstance(llm, BaseChatModel):
prompt = [HumanMessage(content=prompt)]
response = llm.generate([prompt])
answer = response.generations[0][0].text
return answer.strip()
@classmethod
def generate_suggested_questions_after_answer(cls, tenant_id: str, histories: str):
output_parser = SuggestedQuestionsAfterAnswerOutputParser()
format_instructions = output_parser.get_format_instructions()
prompt = OutLinePromptTemplate(
template="{histories}\n{format_instructions}\nquestions:\n",
input_variables=["histories"],
partial_variables={"format_instructions": format_instructions}
)
_input = prompt.format_prompt(histories=histories)
llm: StreamableOpenAI = LLMBuilder.to_llm(
tenant_id=tenant_id,
model_name=generate_base_model,
temperature=0,
max_tokens=256
)
if isinstance(llm, BaseChatModel):
query = [HumanMessage(content=_input.to_string())]
else:
query = _input.to_string()
try:
output = llm(query)
questions = output_parser.parse(output)
except Exception:
logging.exception("Error generating suggested questions after answer")
questions = []
return questions

View File

@ -0,0 +1,45 @@
from langchain.callbacks import CallbackManager
from llama_index import ServiceContext, PromptHelper, LLMPredictor
from core.callback_handler.std_out_callback_handler import DifyStdOutCallbackHandler
from core.embedding.openai_embedding import OpenAIEmbedding
from core.llm.llm_builder import LLMBuilder
class IndexBuilder:
@classmethod
def get_default_service_context(cls, tenant_id: str) -> ServiceContext:
# set number of output tokens
num_output = 512
# only for verbose
callback_manager = CallbackManager([DifyStdOutCallbackHandler()])
llm = LLMBuilder.to_llm(
tenant_id=tenant_id,
model_name='text-davinci-003',
temperature=0,
max_tokens=num_output,
callback_manager=callback_manager,
)
llm_predictor = LLMPredictor(llm=llm)
# These parameters here will affect the logic of segmenting the final synthesized response.
# The number of refinement iterations in the synthesis process depends
# on whether the length of the segmented output exceeds the max_input_size.
prompt_helper = PromptHelper(
max_input_size=3500,
num_output=num_output,
max_chunk_overlap=20
)
model_credentials = LLMBuilder.get_model_credentials(
tenant_id=tenant_id,
model_name='text-embedding-ada-002'
)
return ServiceContext.from_defaults(
llm_predictor=llm_predictor,
prompt_helper=prompt_helper,
embed_model=OpenAIEmbedding(**model_credentials),
)

View File

@ -0,0 +1,159 @@
import re
from typing import (
Any,
Dict,
List,
Set,
Optional
)
import jieba.analyse
from core.index.keyword_table.stopwords import STOPWORDS
from llama_index.indices.query.base import IS
from llama_index import QueryMode
from llama_index.indices.base import QueryMap
from llama_index.indices.keyword_table.base import BaseGPTKeywordTableIndex
from llama_index.indices.keyword_table.query import BaseGPTKeywordTableQuery
from llama_index.docstore import BaseDocumentStore
from llama_index.indices.postprocessor.node import (
BaseNodePostprocessor,
)
from llama_index.indices.response.response_builder import ResponseMode
from llama_index.indices.service_context import ServiceContext
from llama_index.optimization.optimizer import BaseTokenUsageOptimizer
from llama_index.prompts.prompts import (
QuestionAnswerPrompt,
RefinePrompt,
SimpleInputPrompt,
)
from core.index.query.synthesizer import EnhanceResponseSynthesizer
def jieba_extract_keywords(
text_chunk: str,
max_keywords: Optional[int] = None,
expand_with_subtokens: bool = True,
) -> Set[str]:
"""Extract keywords with JIEBA tfidf."""
keywords = jieba.analyse.extract_tags(
sentence=text_chunk,
topK=max_keywords,
)
if expand_with_subtokens:
return set(expand_tokens_with_subtokens(keywords))
else:
return set(keywords)
def expand_tokens_with_subtokens(tokens: Set[str]) -> Set[str]:
"""Get subtokens from a list of tokens., filtering for stopwords."""
results = set()
for token in tokens:
results.add(token)
sub_tokens = re.findall(r"\w+", token)
if len(sub_tokens) > 1:
results.update({w for w in sub_tokens if w not in list(STOPWORDS)})
return results
class GPTJIEBAKeywordTableIndex(BaseGPTKeywordTableIndex):
"""GPT JIEBA Keyword Table Index.
This index uses a JIEBA keyword extractor to extract keywords from the text.
"""
def _extract_keywords(self, text: str) -> Set[str]:
"""Extract keywords from text."""
return jieba_extract_keywords(text, max_keywords=self.max_keywords_per_chunk)
@classmethod
def get_query_map(self) -> QueryMap:
"""Get query map."""
super_map = super().get_query_map()
super_map[QueryMode.DEFAULT] = GPTKeywordTableJIEBAQuery
return super_map
def _delete(self, doc_id: str, **delete_kwargs: Any) -> None:
"""Delete a document."""
# get set of ids that correspond to node
node_idxs_to_delete = {doc_id}
# delete node_idxs from keyword to node idxs mapping
keywords_to_delete = set()
for keyword, node_idxs in self._index_struct.table.items():
if node_idxs_to_delete.intersection(node_idxs):
self._index_struct.table[keyword] = node_idxs.difference(
node_idxs_to_delete
)
if not self._index_struct.table[keyword]:
keywords_to_delete.add(keyword)
for keyword in keywords_to_delete:
del self._index_struct.table[keyword]
class GPTKeywordTableJIEBAQuery(BaseGPTKeywordTableQuery):
"""GPT Keyword Table Index JIEBA Query.
Extracts keywords using JIEBA keyword extractor.
Set when `mode="jieba"` in `query` method of `GPTKeywordTableIndex`.
.. code-block:: python
response = index.query("<query_str>", mode="jieba")
See BaseGPTKeywordTableQuery for arguments.
"""
@classmethod
def from_args(
cls,
index_struct: IS,
service_context: ServiceContext,
docstore: Optional[BaseDocumentStore] = None,
node_postprocessors: Optional[List[BaseNodePostprocessor]] = None,
verbose: bool = False,
# response synthesizer args
response_mode: ResponseMode = ResponseMode.DEFAULT,
text_qa_template: Optional[QuestionAnswerPrompt] = None,
refine_template: Optional[RefinePrompt] = None,
simple_template: Optional[SimpleInputPrompt] = None,
response_kwargs: Optional[Dict] = None,
use_async: bool = False,
streaming: bool = False,
optimizer: Optional[BaseTokenUsageOptimizer] = None,
# class-specific args
**kwargs: Any,
) -> "BaseGPTIndexQuery":
response_synthesizer = EnhanceResponseSynthesizer.from_args(
service_context=service_context,
text_qa_template=text_qa_template,
refine_template=refine_template,
simple_template=simple_template,
response_mode=response_mode,
response_kwargs=response_kwargs,
use_async=use_async,
streaming=streaming,
optimizer=optimizer,
)
return cls(
index_struct=index_struct,
service_context=service_context,
response_synthesizer=response_synthesizer,
docstore=docstore,
node_postprocessors=node_postprocessors,
verbose=verbose,
**kwargs,
)
def _get_keywords(self, query_str: str) -> List[str]:
"""Extract keywords."""
return list(
jieba_extract_keywords(query_str, max_keywords=self.max_keywords_per_query)
)

View File

@ -0,0 +1,90 @@
STOPWORDS = {
"during", "when", "but", "then", "further", "isn", "mustn't", "until", "own", "i", "couldn", "y", "only", "you've",
"ours", "who", "where", "ourselves", "has", "to", "was", "didn't", "themselves", "if", "against", "through", "her",
"an", "your", "can", "those", "didn", "about", "aren't", "shan't", "be", "not", "these", "again", "so", "t",
"theirs", "weren", "won't", "won", "itself", "just", "same", "while", "why", "doesn", "aren", "him", "haven",
"for", "you'll", "that", "we", "am", "d", "by", "having", "wasn't", "than", "weren't", "out", "from", "now",
"their", "too", "hadn", "o", "needn", "most", "it", "under", "needn't", "any", "some", "few", "ll", "hers", "which",
"m", "you're", "off", "other", "had", "she", "you'd", "do", "you", "does", "s", "will", "each", "wouldn't", "hasn't",
"such", "more", "whom", "she's", "my", "yours", "yourself", "of", "on", "very", "hadn't", "with", "yourselves",
"been", "ma", "them", "mightn't", "shan", "mustn", "they", "what", "both", "that'll", "how", "is", "he", "because",
"down", "haven't", "are", "no", "it's", "our", "being", "the", "or", "above", "myself", "once", "don't", "doesn't",
"as", "nor", "here", "herself", "hasn", "mightn", "have", "its", "all", "were", "ain", "this", "at", "after",
"over", "shouldn't", "into", "before", "don", "wouldn", "re", "couldn't", "wasn", "in", "should", "there",
"himself", "isn't", "should've", "doing", "ve", "shouldn", "a", "did", "and", "his", "between", "me", "up", "below",
"人民", "末##末", "", "", "", "哎呀", "哎哟", "", "", "俺们", "", "按照", "", "吧哒", "", "罢了", "", "",
"本着", "", "比方", "比如", "鄙人", "", "彼此", "", "", "别的", "别说", "", "并且", "不比", "不成", "不单", "不但",
"不独", "不管", "不光", "不过", "不仅", "不拘", "不论", "不怕", "不然", "不如", "不特", "不惟", "不问", "不只", "", "朝着",
"", "趁着", "", "", "", "除此之外", "除非", "除了", "", "此间", "此外", "", "从而", "", "", "", "但是", "",
"当着", "", "", "", "的话", "", "等等", "", "", "叮咚", "", "对于", "", "多少", "", "而况", "而且", "而是",
"而外", "而言", "而已", "尔后", "反过来", "反过来说", "反之", "非但", "非徒", "否则", "", "嘎登", "", "", "", "",
"各个", "各位", "各种", "各自", "", "根据", "", "", "故此", "固然", "关于", "", "", "果然", "果真", "", "",
"哈哈", "", "", "", "何处", "何况", "何时", "", "", "哼唷", "呼哧", "", "", "还是", "还有", "换句话说", "换言之",
"", "或是", "或者", "极了", "", "及其", "及至", "", "即便", "即或", "即令", "即若", "即使", "", "几时", "", "",
"既然", "既是", "继而", "加之", "假如", "假若", "假使", "鉴于", "", "", "较之", "", "接着", "结果", "", "紧接着",
"进而", "", "尽管", "", "经过", "", "就是", "就是说", "", "具体地说", "具体说来", "开始", "开外", "", "", "",
"可见", "可是", "可以", "况且", "", "", "来着", "", "例如", "", "", "连同", "两者", "", "", "", "另外",
"另一方面", "", "", "", "慢说", "漫说", "", "", "", "每当", "", "莫若", "", "某个", "某些", "", "",
"哪边", "哪儿", "哪个", "哪里", "哪年", "哪怕", "哪天", "哪些", "哪样", "", "那边", "那儿", "那个", "那会儿", "那里", "那么",
"那么些", "那么样", "那时", "那些", "那样", "", "乃至", "", "", "", "你们", "", "", "宁可", "宁肯", "宁愿", "",
"", "啪达", "旁人", "", "", "凭借", "", "其次", "其二", "其他", "其它", "其一", "其余", "其中", "", "起见", "岂但",
"恰恰相反", "前后", "前者", "", "然而", "然后", "然则", "", "人家", "", "任何", "任凭", "", "如此", "如果", "如何",
"如其", "如若", "如上所述", "", "若非", "若是", "", "上下", "尚且", "设若", "设使", "甚而", "甚么", "甚至", "省得", "时候",
"什么", "什么样", "使得", "", "是的", "首先", "", "谁知", "", "顺着", "似的", "", "虽然", "虽说", "虽则", "", "随着",
"", "所以", "", "他们", "他人", "", "它们", "", "她们", "", "倘或", "倘然", "倘若", "倘使", "", "", "通过", "",
"同时", "", "万一", "", "", "", "为何", "为了", "为什么", "为着", "", "嗡嗡", "", "我们", "", "呜呼", "乌乎",
"无论", "无宁", "毋宁", "", "", "相对而言", "", "", "向着", "", "", "", "沿", "沿着", "", "要不", "要不然",
"要不是", "要么", "要是", "", "也罢", "也好", "", "一般", "一旦", "一方面", "一来", "一切", "一样", "一则", "", "依照",
"", "", "以便", "以及", "以免", "以至", "以至于", "以致", "抑或", "", "因此", "因而", "因为", "", "", "",
"由此可见", "由于", "", "有的", "有关", "有些", "", "", "于是", "于是乎", "", "与此同时", "与否", "与其", "越是",
"云云", "", "再说", "再者", "", "在下", "", "咱们", "", "", "怎么", "怎么办", "怎么样", "怎样", "", "", "照着",
"", "", "这边", "这儿", "这个", "这会儿", "这就是说", "这里", "这么", "这么点儿", "这么些", "这么样", "这时", "这些", "这样",
"正如", "", "", "之类", "之所以", "之一", "只是", "只限", "只要", "只有", "", "至于", "诸位", "", "着呢", "", "自从",
"自个儿", "自各儿", "自己", "自家", "自身", "综上所述", "总的来看", "总的来说", "总的说来", "总而言之", "总之", "", "纵令",
"纵然", "纵使", "遵照", "作为", "", "", "", "", "", "", "", "喔唷", "", "", "", "~", "!", ".", ":",
"\"", "'", "(", ")", "*", "A", "", "社会主义", "--", "..", ">>", " [", " ]", "", "<", ">", "/", "\\", "|", "-", "_",
"+", "=", "&", "^", "%", "#", "@", "`", ";", "$", "", "", "——", "", "", "·", "...", "", "", "", "", "",
" ", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "︿", "", "", "", "", "", "",
"", "", "", "啊哈", "啊呀", "啊哟", "挨次", "挨个", "挨家挨户", "挨门挨户", "挨门逐户", "挨着", "按理", "按期", "按时",
"按说", "暗地里", "暗中", "暗自", "昂然", "八成", "白白", "", "", "保管", "保险", "", "背地里", "背靠背", "倍感", "倍加",
"本人", "本身", "", "比起", "比如说", "比照", "毕竟", "", "必定", "必将", "必须", "便", "别人", "并非", "并肩", "并没",
"并没有", "并排", "并无", "勃然", "", "不必", "不常", "不大", "不但...而且", "不得", "不得不", "不得了", "不得已", "不迭",
"不定", "不对", "不妨", "不管怎样", "不会", "不仅...而且", "不仅仅", "不仅仅是", "不经意", "不可开交", "不可抗拒", "不力", "不了",
"不料", "不满", "不免", "不能不", "不起", "不巧", "不然的话", "不日", "不少", "不胜", "不时", "不是", "不同", "不能", "不要",
"不外", "不外乎", "不下", "不限", "不消", "不已", "不亦乐乎", "不由得", "不再", "不择手段", "不怎么", "不曾", "不知不觉", "不止",
"不止一次", "不至于", "", "才能", "策略地", "差不多", "差一点", "", "常常", "常言道", "常言说", "常言说得好", "长此下去",
"长话短说", "长期以来", "长线", "敞开儿", "彻夜", "陈年", "趁便", "趁机", "趁热", "趁势", "趁早", "成年", "成年累月", "成心",
"乘机", "乘胜", "乘势", "乘隙", "乘虚", "诚然", "迟早", "充分", "充其极", "充其量", "抽冷子", "", "", "", "出来", "出去",
"除此", "除此而外", "除此以外", "除开", "除去", "除却", "除外", "处处", "川流不息", "", "传说", "传闻", "串行", "", "纯粹",
"此后", "此中", "次第", "匆匆", "从不", "从此", "从此以后", "从古到今", "从古至今", "从今以后", "从宽", "从来", "从轻", "从速",
"从头", "从未", "从无到有", "从小", "从新", "从严", "从优", "从早到晚", "从中", "从重", "凑巧", "", "存心", "达旦", "打从",
"打开天窗说亮话", "", "大不了", "大大", "大抵", "大都", "大多", "大凡", "大概", "大家", "大举", "大略", "大面儿上", "大事",
"大体", "大体上", "大约", "大张旗鼓", "大致", "呆呆地", "", "", "待到", "", "单纯", "单单", "但愿", "弹指之间", "当场",
"当儿", "当即", "当口儿", "当然", "当庭", "当头", "当下", "当真", "当中", "倒不如", "倒不如说", "倒是", "到处", "到底", "到了儿",
"到目前为止", "到头", "到头来", "得起", "得天独厚", "的确", "等到", "叮当", "顶多", "", "动不动", "动辄", "陡然", "", "",
"独自", "断然", "顿时", "多次", "多多", "多多少少", "多多益善", "多亏", "多年来", "多年前", "而后", "而论", "而又", "尔等",
"二话不说", "二话没说", "反倒", "反倒是", "反而", "反手", "反之亦然", "反之则", "", "方才", "方能", "放量", "非常", "非得",
"分期", "分期分批", "分头", "奋勇", "愤然", "风雨无阻", "", "", "", "嘎嘎", "该当", "", "赶快", "赶早不赶晚", "",
"敢情", "敢于", "", "刚才", "刚好", "刚巧", "高低", "格外", "隔日", "隔夜", "个人", "各式", "", "更加", "更进一步", "更为",
"公然", "", "共总", "够瞧的", "姑且", "古来", "故而", "故意", "", "", "怪不得", "惯常", "", "光是", "归根到底",
"归根结底", "过于", "毫不", "毫无", "毫无保留地", "毫无例外", "好在", "何必", "何尝", "何妨", "何苦", "何乐而不为", "何须",
"何止", "", "很多", "很少", "轰然", "后来", "呼啦", "忽地", "忽然", "", "互相", "哗啦", "话说", "", "恍然", "", "豁然",
"", "伙同", "或多或少", "或许", "基本", "基本上", "基于", "", "极大", "极度", "极端", "极力", "极其", "极为", "急匆匆",
"即将", "即刻", "即是说", "几度", "几番", "几乎", "几经", "既...又", "继之", "加上", "加以", "间或", "简而言之", "简言之",
"简直", "", "将才", "将近", "将要", "交口", "较比", "较为", "接连不断", "接下来", "皆可", "截然", "截至", "藉以", "借此",
"借以", "届时", "", "仅仅", "", "进来", "进去", "", "近几年来", "近来", "近年来", "尽管如此", "尽可能", "尽快", "尽量",
"尽然", "尽如人意", "尽心竭力", "尽心尽力", "尽早", "精光", "经常", "", "竟然", "究竟", "就此", "就地", "就算", "居然", "局外",
"举凡", "据称", "据此", "据实", "据说", "据我所知", "据悉", "具体来说", "决不", "决非", "", "绝不", "绝顶", "绝对", "绝非",
"", "", "", "看来", "看起来", "看上去", "看样子", "可好", "可能", "恐怕", "", "快要", "来不及", "来得及", "来讲",
"来看", "拦腰", "牢牢", "", "老大", "老老实实", "老是", "累次", "累年", "理当", "理该", "理应", "", "", "立地", "立刻",
"立马", "立时", "联袂", "连连", "连日", "连日来", "连声", "连袂", "临到", "另方面", "另行", "另一个", "路经", "", "屡次",
"屡次三番", "屡屡", "缕缕", "率尔", "率然", "", "略加", "略微", "略为", "论说", "马上", "", "", "", "没有", "每逢",
"每每", "每时每刻", "猛然", "猛然间", "", "莫不", "莫非", "莫如", "默默地", "默然", "", "那末", "", "难道", "难得", "难怪",
"难说", "", "年复一年", "凝神", "偶而", "偶尔", "", "", "碰巧", "譬如", "偏偏", "", "平素", "", "迫于", "扑通",
"其后", "其实", "", "", "起初", "起来", "起首", "起头", "起先", "", "岂非", "岂止", "", "恰逢", "恰好", "恰恰", "恰巧",
"恰如", "恰似", "", "千万", "千万千万", "", "切不可", "切莫", "切切", "切勿", "", "亲口", "亲身", "亲手", "亲眼", "亲自",
"", "顷刻", "顷刻间", "顷刻之间", "请勿", "穷年累月", "取道", "", "权时", "全都", "全力", "全年", "全然", "全身心", "",
"人人", "", "仍旧", "仍然", "日复一日", "日见", "日渐", "日益", "日臻", "如常", "如此等等", "如次", "如今", "如期", "如前所述",
"如上", "如下", "", "三番两次", "三番五次", "三天两头", "瑟瑟", "沙沙", "", "上来", "上去", "一个", "", "", "\n"
}

Some files were not shown because too many files have changed in this diff Show More