Fix the issue of decoding a non-UTF-8 encoded file using UTF-8 encodi… (#378)

This commit is contained in:
Columbus 2023-06-16 14:12:07 +08:00 committed by GitHub
parent 673288d58e
commit eeb2c28526
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,6 +1,7 @@
import datetime
import hashlib
import tempfile
import chardet
import time
import uuid
from pathlib import Path
@ -141,7 +142,8 @@ class FilePreviewApi(Resource):
# ['txt', 'markdown', 'md']
with open(filepath, "rb") as fp:
data = fp.read()
text = data.decode(encoding='utf-8').strip() if data else ''
encoding = chardet.detect(data)['encoding']
text = data.decode(encoding=encoding).strip() if data else ''
text = text[0:PREVIEW_WORDS_LIMIT] if text else ''
return {'content': text}