Skip to content

Commit 94c9cad

Browse files
wy96froot
andauthored
fix image files not deleted on indexing_estimate #9541 (#10798)
Co-authored-by: root <[email protected]>
1 parent 2ae6460 commit 94c9cad

File tree

3 files changed

+16
-0
lines changed

3 files changed

+16
-0
lines changed

api/core/indexing_runner.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
)
3131
from core.rag.splitter.text_splitter import TextSplitter
3232
from core.tools.utils.text_processing_utils import remove_leading_symbols
33+
from core.tools.utils.web_reader_tool import get_image_upload_file_ids
3334
from extensions.ext_database import db
3435
from extensions.ext_redis import redis_client
3536
from extensions.ext_storage import storage
@@ -279,6 +280,19 @@ def indexing_estimate(
279280
if len(preview_texts) < 5:
280281
preview_texts.append(document.page_content)
281282

283+
# delete image files and related db records
284+
image_upload_file_ids = get_image_upload_file_ids(document.page_content)
285+
for upload_file_id in image_upload_file_ids:
286+
image_file = db.session.query(UploadFile).filter(UploadFile.id == upload_file_id).first()
287+
try:
288+
storage.delete(image_file.key)
289+
except Exception:
290+
logging.exception(
291+
"Delete image_files failed while indexing_estimate, \
292+
image_upload_file_is: {}".format(upload_file_id)
293+
)
294+
db.session.delete(image_file)
295+
282296
if doc_form and doc_form == "qa_model":
283297
if len(preview_texts) > 0:
284298
# qa model document

api/tasks/clean_dataset_task.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ def clean_dataset_task(
7878
"Delete image_files failed when storage deleted, \
7979
image_upload_file_is: {}".format(upload_file_id)
8080
)
81+
db.session.delete(image_file)
8182
db.session.delete(segment)
8283

8384
db.session.query(DatasetProcessRule).filter(DatasetProcessRule.dataset_id == dataset_id).delete()

api/tasks/clean_document_task.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ def clean_document_task(document_id: str, dataset_id: str, doc_form: str, file_i
5151
"Delete image_files failed when storage deleted, \
5252
image_upload_file_is: {}".format(upload_file_id)
5353
)
54+
db.session.delete(image_file)
5455
db.session.delete(segment)
5556

5657
db.session.commit()

0 commit comments

Comments
 (0)