Skip to content

Commit 3defd24

Browse files
authored
feat: allow updating chunk settings for the existing documents (#12833)
1 parent 9d86147 commit 3defd24

File tree

4 files changed

+24
-18
lines changed

4 files changed

+24
-18
lines changed

api/services/dataset_service.py

+16-16
Original file line numberDiff line numberDiff line change
@@ -859,7 +859,7 @@ def save_document_with_dataset_id(
859859
position = DocumentService.get_documents_position(dataset.id)
860860
document_ids = []
861861
duplicate_document_ids = []
862-
if knowledge_config.data_source.info_list.data_source_type == "upload_file":
862+
if knowledge_config.data_source.info_list.data_source_type == "upload_file": # type: ignore
863863
upload_file_list = knowledge_config.data_source.info_list.file_info_list.file_ids # type: ignore
864864
for file_id in upload_file_list:
865865
file = (
@@ -901,7 +901,7 @@ def save_document_with_dataset_id(
901901
document = DocumentService.build_document(
902902
dataset,
903903
dataset_process_rule.id, # type: ignore
904-
knowledge_config.data_source.info_list.data_source_type,
904+
knowledge_config.data_source.info_list.data_source_type, # type: ignore
905905
knowledge_config.doc_form,
906906
knowledge_config.doc_language,
907907
data_source_info,
@@ -916,8 +916,8 @@ def save_document_with_dataset_id(
916916
document_ids.append(document.id)
917917
documents.append(document)
918918
position += 1
919-
elif knowledge_config.data_source.info_list.data_source_type == "notion_import":
920-
notion_info_list = knowledge_config.data_source.info_list.notion_info_list
919+
elif knowledge_config.data_source.info_list.data_source_type == "notion_import": # type: ignore
920+
notion_info_list = knowledge_config.data_source.info_list.notion_info_list # type: ignore
921921
if not notion_info_list:
922922
raise ValueError("No notion info list found.")
923923
exist_page_ids = []
@@ -956,7 +956,7 @@ def save_document_with_dataset_id(
956956
document = DocumentService.build_document(
957957
dataset,
958958
dataset_process_rule.id, # type: ignore
959-
knowledge_config.data_source.info_list.data_source_type,
959+
knowledge_config.data_source.info_list.data_source_type, # type: ignore
960960
knowledge_config.doc_form,
961961
knowledge_config.doc_language,
962962
data_source_info,
@@ -976,8 +976,8 @@ def save_document_with_dataset_id(
976976
# delete not selected documents
977977
if len(exist_document) > 0:
978978
clean_notion_document_task.delay(list(exist_document.values()), dataset.id)
979-
elif knowledge_config.data_source.info_list.data_source_type == "website_crawl":
980-
website_info = knowledge_config.data_source.info_list.website_info_list
979+
elif knowledge_config.data_source.info_list.data_source_type == "website_crawl": # type: ignore
980+
website_info = knowledge_config.data_source.info_list.website_info_list # type: ignore
981981
if not website_info:
982982
raise ValueError("No website info list found.")
983983
urls = website_info.urls
@@ -996,7 +996,7 @@ def save_document_with_dataset_id(
996996
document = DocumentService.build_document(
997997
dataset,
998998
dataset_process_rule.id, # type: ignore
999-
knowledge_config.data_source.info_list.data_source_type,
999+
knowledge_config.data_source.info_list.data_source_type, # type: ignore
10001000
knowledge_config.doc_form,
10011001
knowledge_config.doc_language,
10021002
data_source_info,
@@ -1195,20 +1195,20 @@ def save_document_without_dataset_id(tenant_id: str, knowledge_config: Knowledge
11951195

11961196
if features.billing.enabled:
11971197
count = 0
1198-
if knowledge_config.data_source.info_list.data_source_type == "upload_file":
1198+
if knowledge_config.data_source.info_list.data_source_type == "upload_file": # type: ignore
11991199
upload_file_list = (
1200-
knowledge_config.data_source.info_list.file_info_list.file_ids
1201-
if knowledge_config.data_source.info_list.file_info_list
1200+
knowledge_config.data_source.info_list.file_info_list.file_ids # type: ignore
1201+
if knowledge_config.data_source.info_list.file_info_list # type: ignore
12021202
else []
12031203
)
12041204
count = len(upload_file_list)
1205-
elif knowledge_config.data_source.info_list.data_source_type == "notion_import":
1206-
notion_info_list = knowledge_config.data_source.info_list.notion_info_list
1205+
elif knowledge_config.data_source.info_list.data_source_type == "notion_import": # type: ignore
1206+
notion_info_list = knowledge_config.data_source.info_list.notion_info_list # type: ignore
12071207
if notion_info_list:
12081208
for notion_info in notion_info_list:
12091209
count = count + len(notion_info.pages)
1210-
elif knowledge_config.data_source.info_list.data_source_type == "website_crawl":
1211-
website_info = knowledge_config.data_source.info_list.website_info_list
1210+
elif knowledge_config.data_source.info_list.data_source_type == "website_crawl": # type: ignore
1211+
website_info = knowledge_config.data_source.info_list.website_info_list # type: ignore
12121212
if website_info:
12131213
count = len(website_info.urls)
12141214
batch_upload_limit = int(dify_config.BATCH_UPLOAD_LIMIT)
@@ -1239,7 +1239,7 @@ def save_document_without_dataset_id(tenant_id: str, knowledge_config: Knowledge
12391239
dataset = Dataset(
12401240
tenant_id=tenant_id,
12411241
name="",
1242-
data_source_type=knowledge_config.data_source.info_list.data_source_type,
1242+
data_source_type=knowledge_config.data_source.info_list.data_source_type, # type: ignore
12431243
indexing_technique=knowledge_config.indexing_technique,
12441244
created_by=account.id,
12451245
embedding_model=knowledge_config.embedding_model,

api/services/entities/knowledge_entities/knowledge_entities.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ class KnowledgeConfig(BaseModel):
9797
original_document_id: Optional[str] = None
9898
duplicate: bool = True
9999
indexing_technique: Literal["high_quality", "economy"]
100-
data_source: DataSource
100+
data_source: Optional[DataSource] = None
101101
process_rule: Optional[ProcessRule] = None
102102
retrieval_model: Optional[RetrievalModel] = None
103103
doc_form: str = "text_model"

web/app/components/datasets/create/step-two/index.tsx

+1-1
Original file line numberDiff line numberDiff line change
@@ -1001,7 +1001,7 @@ const StepTwo = ({
10011001
)
10021002
: (
10031003
<div className='flex items-center mt-8 py-2'>
1004-
{!datasetId && <Button loading={isCreating} variant='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.save')}</Button>}
1004+
<Button loading={isCreating} variant='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.save')}</Button>
10051005
<Button className='ml-2' onClick={onCancel}>{t('datasetCreation.stepTwo.cancel')}</Button>
10061006
</div>
10071007
)}

web/app/components/datasets/documents/detail/completed/index.tsx

+6
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react'
44
import { useDebounceFn } from 'ahooks'
55
import { useTranslation } from 'react-i18next'
66
import { createContext, useContext, useContextSelector } from 'use-context-selector'
7+
import { usePathname } from 'next/navigation'
78
import { useDocumentContext } from '../index'
89
import { ProcessStatus } from '../segment-add'
910
import s from './style.module.css'
@@ -99,6 +100,7 @@ const Completed: FC<ICompletedProps> = ({
99100
}) => {
100101
const { t } = useTranslation()
101102
const { notify } = useContext(ToastContext)
103+
const pathname = usePathname()
102104
const datasetId = useDocumentContext(s => s.datasetId) || ''
103105
const documentId = useDocumentContext(s => s.documentId) || ''
104106
const docForm = useDocumentContext(s => s.docForm)
@@ -374,6 +376,10 @@ const Completed: FC<ICompletedProps> = ({
374376
// eslint-disable-next-line react-hooks/exhaustive-deps
375377
}, [segments, datasetId, documentId])
376378

379+
useEffect(() => {
380+
resetList()
381+
}, [pathname])
382+
377383
useEffect(() => {
378384
if (importStatus === ProcessStatus.COMPLETED)
379385
resetList()

0 commit comments

Comments
 (0)