File tree 9 files changed +17
-15
lines changed
9 files changed +17
-15
lines changed Original file line number Diff line number Diff line change @@ -601,7 +601,7 @@ class RagEtlConfig(BaseSettings):
601
601
602
602
UNSTRUCTURED_API_KEY : Optional [str ] = Field (
603
603
description = "API key for Unstructured.io service" ,
604
- default = None ,
604
+ default = "" ,
605
605
)
606
606
607
607
SCARF_NO_ANALYTICS : Optional [str ] = Field (
Original file line number Diff line number Diff line change @@ -102,12 +102,11 @@ def extract(
102
102
input_file = Path (file_path )
103
103
file_extension = input_file .suffix .lower ()
104
104
etl_type = dify_config .ETL_TYPE
105
- unstructured_api_url = dify_config .UNSTRUCTURED_API_URL
106
- unstructured_api_key = dify_config .UNSTRUCTURED_API_KEY
107
- assert unstructured_api_url is not None , "unstructured_api_url is required"
108
- assert unstructured_api_key is not None , "unstructured_api_key is required"
109
105
extractor : Optional [BaseExtractor ] = None
110
106
if etl_type == "Unstructured" :
107
+ unstructured_api_url = dify_config .UNSTRUCTURED_API_URL
108
+ unstructured_api_key = dify_config .UNSTRUCTURED_API_KEY or ""
109
+
111
110
if file_extension in {".xlsx" , ".xls" }:
112
111
extractor = ExcelExtractor (file_path )
113
112
elif file_extension == ".pdf" :
Original file line number Diff line number Diff line change 1
1
import base64
2
2
import logging
3
+ from typing import Optional
3
4
4
5
from bs4 import BeautifulSoup # type: ignore
5
6
@@ -15,7 +16,7 @@ class UnstructuredEmailExtractor(BaseExtractor):
15
16
file_path: Path to the file to load.
16
17
"""
17
18
18
- def __init__ (self , file_path : str , api_url : str , api_key : str ):
19
+ def __init__ (self , file_path : str , api_url : Optional [ str ] = None , api_key : str = "" ):
19
20
"""Initialize with file path."""
20
21
self ._file_path = file_path
21
22
self ._api_url = api_url
Original file line number Diff line number Diff line change @@ -19,7 +19,7 @@ def __init__(
19
19
self ,
20
20
file_path : str ,
21
21
api_url : Optional [str ] = None ,
22
- api_key : Optional [ str ] = None ,
22
+ api_key : str = "" ,
23
23
):
24
24
"""Initialize with file path."""
25
25
self ._file_path = file_path
@@ -30,9 +30,6 @@ def extract(self) -> list[Document]:
30
30
if self ._api_url :
31
31
from unstructured .partition .api import partition_via_api
32
32
33
- if self ._api_key is None :
34
- raise ValueError ("api_key is required" )
35
-
36
33
elements = partition_via_api (filename = self ._file_path , api_url = self ._api_url , api_key = self ._api_key )
37
34
else :
38
35
from unstructured .partition .epub import partition_epub
Original file line number Diff line number Diff line change 1
1
import logging
2
+ from typing import Optional
2
3
3
4
from core .rag .extractor .extractor_base import BaseExtractor
4
5
from core .rag .models .document import Document
@@ -24,7 +25,7 @@ class UnstructuredMarkdownExtractor(BaseExtractor):
24
25
if the specified encoding fails.
25
26
"""
26
27
27
- def __init__ (self , file_path : str , api_url : str , api_key : str ):
28
+ def __init__ (self , file_path : str , api_url : Optional [ str ] = None , api_key : str = "" ):
28
29
"""Initialize with file path."""
29
30
self ._file_path = file_path
30
31
self ._api_url = api_url
Original file line number Diff line number Diff line change 1
1
import logging
2
+ from typing import Optional
2
3
3
4
from core .rag .extractor .extractor_base import BaseExtractor
4
5
from core .rag .models .document import Document
@@ -14,7 +15,7 @@ class UnstructuredMsgExtractor(BaseExtractor):
14
15
file_path: Path to the file to load.
15
16
"""
16
17
17
- def __init__ (self , file_path : str , api_url : str , api_key : str ):
18
+ def __init__ (self , file_path : str , api_url : Optional [ str ] = None , api_key : str = "" ):
18
19
"""Initialize with file path."""
19
20
self ._file_path = file_path
20
21
self ._api_url = api_url
Original file line number Diff line number Diff line change 1
1
import logging
2
+ from typing import Optional
2
3
3
4
from core .rag .extractor .extractor_base import BaseExtractor
4
5
from core .rag .models .document import Document
@@ -14,7 +15,7 @@ class UnstructuredPPTExtractor(BaseExtractor):
14
15
file_path: Path to the file to load.
15
16
"""
16
17
17
- def __init__ (self , file_path : str , api_url : str , api_key : str ):
18
+ def __init__ (self , file_path : str , api_url : Optional [ str ] = None , api_key : str = "" ):
18
19
"""Initialize with file path."""
19
20
self ._file_path = file_path
20
21
self ._api_url = api_url
Original file line number Diff line number Diff line change 1
1
import logging
2
+ from typing import Optional
2
3
3
4
from core .rag .extractor .extractor_base import BaseExtractor
4
5
from core .rag .models .document import Document
@@ -14,7 +15,7 @@ class UnstructuredPPTXExtractor(BaseExtractor):
14
15
file_path: Path to the file to load.
15
16
"""
16
17
17
- def __init__ (self , file_path : str , api_url : str , api_key : str ):
18
+ def __init__ (self , file_path : str , api_url : Optional [ str ] = None , api_key : str = "" ):
18
19
"""Initialize with file path."""
19
20
self ._file_path = file_path
20
21
self ._api_url = api_url
Original file line number Diff line number Diff line change 1
1
import logging
2
+ from typing import Optional
2
3
3
4
from core .rag .extractor .extractor_base import BaseExtractor
4
5
from core .rag .models .document import Document
@@ -14,7 +15,7 @@ class UnstructuredXmlExtractor(BaseExtractor):
14
15
file_path: Path to the file to load.
15
16
"""
16
17
17
- def __init__ (self , file_path : str , api_url : str , api_key : str ):
18
+ def __init__ (self , file_path : str , api_url : Optional [ str ] = None , api_key : str = "" ):
18
19
"""Initialize with file path."""
19
20
self ._file_path = file_path
20
21
self ._api_url = api_url
You can’t perform that action at this time.
0 commit comments