This commit is contained in:
Yeuoly 2024-11-15 15:54:14 +08:00
parent a0543ab8fb
commit 6300e506fb
No known key found for this signature in database
GPG Key ID: A66E7E320FB19F61
5 changed files with 19 additions and 5 deletions

View File

@ -1,6 +1,5 @@
from typing import Literal, Optional
from pydantic import BaseModel, Field
from pydantic import BaseModel, Field, field_validator
from core.model_runtime.utils.encoders import jsonable_encoder

View File

@ -0,0 +1,17 @@
import re
def get_image_upload_file_ids(content):
pattern = r"!\[image\]\((http?://.*?(file-preview|image-preview))\)"
matches = re.findall(pattern, content)
image_upload_file_ids = []
for match in matches:
if match[1] == "file-preview":
content_pattern = r"files/([^/]+)/file-preview"
else:
content_pattern = r"files/([^/]+)/image-preview"
content_match = re.search(content_pattern, match[0])
if content_match:
image_upload_file_id = content_match.group(1)
image_upload_file_ids.append(image_upload_file_id)
return image_upload_file_ids

View File

@ -1,8 +1,6 @@
from collections.abc import Generator, Mapping, Sequence
from os import path
from typing import Any, cast
from collections.abc import Mapping, Sequence
from typing import Any
from sqlalchemy import select
from sqlalchemy.orm import Session

View File

@ -5,7 +5,7 @@ import click
from celery import shared_task
from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
from core.tools.utils.web_reader_tool import get_image_upload_file_ids
from core.tools.utils.rag_web_reader import get_image_upload_file_ids
from extensions.ext_database import db
from extensions.ext_storage import storage
from models.dataset import (

View File

@ -6,7 +6,7 @@ import click
from celery import shared_task
from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
from core.tools.utils.web_reader_tool import get_image_upload_file_ids
from core.tools.utils.rag_web_reader import get_image_upload_file_ids
from extensions.ext_database import db
from extensions.ext_storage import storage
from models.dataset import Dataset, DocumentSegment