fix the ssrf of docx file extractor external images (#10237)

This commit is contained in:
Jyong 2024-11-04 15:22:07 +08:00 committed by GitHub
parent 8ab05d4c36
commit 1024fc623e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -14,6 +14,7 @@ import requests
from docx import Document as DocxDocument
from configs import dify_config
from core.helper import ssrf_proxy
from core.rag.extractor.extractor_base import BaseExtractor
from core.rag.models.document import Document
from extensions.ext_database import db
@ -86,7 +87,7 @@ class WordExtractor(BaseExtractor):
image_count += 1
if rel.is_external:
url = rel.reltype
response = requests.get(url, stream=True)
response = ssrf_proxy.get(url, stream=True)
if response.status_code == 200:
image_ext = mimetypes.guess_extension(response.headers["Content-Type"])
file_uuid = str(uuid.uuid4())