Skip to content

feat: allow updating chunk settings for the existing documents #12833

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jan 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 16 additions & 16 deletions api/services/dataset_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -859,7 +859,7 @@ def save_document_with_dataset_id(
position = DocumentService.get_documents_position(dataset.id)
document_ids = []
duplicate_document_ids = []
if knowledge_config.data_source.info_list.data_source_type == "upload_file":
if knowledge_config.data_source.info_list.data_source_type == "upload_file": # type: ignore
upload_file_list = knowledge_config.data_source.info_list.file_info_list.file_ids # type: ignore
for file_id in upload_file_list:
file = (
Expand Down Expand Up @@ -901,7 +901,7 @@ def save_document_with_dataset_id(
document = DocumentService.build_document(
dataset,
dataset_process_rule.id, # type: ignore
knowledge_config.data_source.info_list.data_source_type,
knowledge_config.data_source.info_list.data_source_type, # type: ignore
knowledge_config.doc_form,
knowledge_config.doc_language,
data_source_info,
Expand All @@ -916,8 +916,8 @@ def save_document_with_dataset_id(
document_ids.append(document.id)
documents.append(document)
position += 1
elif knowledge_config.data_source.info_list.data_source_type == "notion_import":
notion_info_list = knowledge_config.data_source.info_list.notion_info_list
elif knowledge_config.data_source.info_list.data_source_type == "notion_import": # type: ignore
notion_info_list = knowledge_config.data_source.info_list.notion_info_list # type: ignore
if not notion_info_list:
raise ValueError("No notion info list found.")
exist_page_ids = []
Expand Down Expand Up @@ -956,7 +956,7 @@ def save_document_with_dataset_id(
document = DocumentService.build_document(
dataset,
dataset_process_rule.id, # type: ignore
knowledge_config.data_source.info_list.data_source_type,
knowledge_config.data_source.info_list.data_source_type, # type: ignore
knowledge_config.doc_form,
knowledge_config.doc_language,
data_source_info,
Expand All @@ -976,8 +976,8 @@ def save_document_with_dataset_id(
# delete not selected documents
if len(exist_document) > 0:
clean_notion_document_task.delay(list(exist_document.values()), dataset.id)
elif knowledge_config.data_source.info_list.data_source_type == "website_crawl":
website_info = knowledge_config.data_source.info_list.website_info_list
elif knowledge_config.data_source.info_list.data_source_type == "website_crawl": # type: ignore
website_info = knowledge_config.data_source.info_list.website_info_list # type: ignore
if not website_info:
raise ValueError("No website info list found.")
urls = website_info.urls
Expand All @@ -996,7 +996,7 @@ def save_document_with_dataset_id(
document = DocumentService.build_document(
dataset,
dataset_process_rule.id, # type: ignore
knowledge_config.data_source.info_list.data_source_type,
knowledge_config.data_source.info_list.data_source_type, # type: ignore
knowledge_config.doc_form,
knowledge_config.doc_language,
data_source_info,
Expand Down Expand Up @@ -1195,20 +1195,20 @@ def save_document_without_dataset_id(tenant_id: str, knowledge_config: Knowledge

if features.billing.enabled:
count = 0
if knowledge_config.data_source.info_list.data_source_type == "upload_file":
if knowledge_config.data_source.info_list.data_source_type == "upload_file": # type: ignore
upload_file_list = (
knowledge_config.data_source.info_list.file_info_list.file_ids
if knowledge_config.data_source.info_list.file_info_list
knowledge_config.data_source.info_list.file_info_list.file_ids # type: ignore
if knowledge_config.data_source.info_list.file_info_list # type: ignore
else []
)
count = len(upload_file_list)
elif knowledge_config.data_source.info_list.data_source_type == "notion_import":
notion_info_list = knowledge_config.data_source.info_list.notion_info_list
elif knowledge_config.data_source.info_list.data_source_type == "notion_import": # type: ignore
notion_info_list = knowledge_config.data_source.info_list.notion_info_list # type: ignore
if notion_info_list:
for notion_info in notion_info_list:
count = count + len(notion_info.pages)
elif knowledge_config.data_source.info_list.data_source_type == "website_crawl":
website_info = knowledge_config.data_source.info_list.website_info_list
elif knowledge_config.data_source.info_list.data_source_type == "website_crawl": # type: ignore
website_info = knowledge_config.data_source.info_list.website_info_list # type: ignore
if website_info:
count = len(website_info.urls)
batch_upload_limit = int(dify_config.BATCH_UPLOAD_LIMIT)
Expand Down Expand Up @@ -1239,7 +1239,7 @@ def save_document_without_dataset_id(tenant_id: str, knowledge_config: Knowledge
dataset = Dataset(
tenant_id=tenant_id,
name="",
data_source_type=knowledge_config.data_source.info_list.data_source_type,
data_source_type=knowledge_config.data_source.info_list.data_source_type, # type: ignore
indexing_technique=knowledge_config.indexing_technique,
created_by=account.id,
embedding_model=knowledge_config.embedding_model,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ class KnowledgeConfig(BaseModel):
original_document_id: Optional[str] = None
duplicate: bool = True
indexing_technique: Literal["high_quality", "economy"]
data_source: DataSource
data_source: Optional[DataSource] = None
process_rule: Optional[ProcessRule] = None
retrieval_model: Optional[RetrievalModel] = None
doc_form: str = "text_model"
Expand Down
2 changes: 1 addition & 1 deletion web/app/components/datasets/create/step-two/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -1001,7 +1001,7 @@ const StepTwo = ({
)
: (
<div className='flex items-center mt-8 py-2'>
{!datasetId && <Button loading={isCreating} variant='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.save')}</Button>}
<Button loading={isCreating} variant='primary' onClick={createHandle}>{t('datasetCreation.stepTwo.save')}</Button>
<Button className='ml-2' onClick={onCancel}>{t('datasetCreation.stepTwo.cancel')}</Button>
</div>
)}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react'
import { useDebounceFn } from 'ahooks'
import { useTranslation } from 'react-i18next'
import { createContext, useContext, useContextSelector } from 'use-context-selector'
import { usePathname } from 'next/navigation'
import { useDocumentContext } from '../index'
import { ProcessStatus } from '../segment-add'
import s from './style.module.css'
Expand Down Expand Up @@ -99,6 +100,7 @@ const Completed: FC<ICompletedProps> = ({
}) => {
const { t } = useTranslation()
const { notify } = useContext(ToastContext)
const pathname = usePathname()
const datasetId = useDocumentContext(s => s.datasetId) || ''
const documentId = useDocumentContext(s => s.documentId) || ''
const docForm = useDocumentContext(s => s.docForm)
Expand Down Expand Up @@ -374,6 +376,10 @@ const Completed: FC<ICompletedProps> = ({
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [segments, datasetId, documentId])

useEffect(() => {
resetList()
}, [pathname])

useEffect(() => {
if (importStatus === ProcessStatus.COMPLETED)
resetList()
Expand Down
Loading