File tree 1 file changed +10
-1
lines changed
1 file changed +10
-1
lines changed Original file line number Diff line number Diff line change 6
6
import pandas as pd
7
7
import pypdfium2
8
8
import yaml
9
+ from unstructured .partition .api import partition_via_api
9
10
from unstructured .partition .email import partition_email
10
11
from unstructured .partition .epub import partition_epub
11
12
from unstructured .partition .msg import partition_msg
12
13
from unstructured .partition .ppt import partition_ppt
13
14
from unstructured .partition .pptx import partition_pptx
14
15
16
+ from configs import dify_config
15
17
from core .file import File , FileTransferMethod , file_manager
16
18
from core .helper import ssrf_proxy
17
19
from core .variables import ArrayFileSegment
@@ -263,7 +265,14 @@ def _extract_text_from_ppt(file_content: bytes) -> str:
263
265
def _extract_text_from_pptx (file_content : bytes ) -> str :
264
266
try :
265
267
with io .BytesIO (file_content ) as file :
266
- elements = partition_pptx (file = file )
268
+ if dify_config .UNSTRUCTURED_API_URL and dify_config .UNSTRUCTURED_API_KEY :
269
+ elements = partition_via_api (
270
+ file = file ,
271
+ api_url = dify_config .UNSTRUCTURED_API_URL ,
272
+ api_key = dify_config .UNSTRUCTURED_API_KEY ,
273
+ )
274
+ else :
275
+ elements = partition_pptx (file = file )
267
276
return "\n " .join ([getattr (element , "text" , "" ) for element in elements ])
268
277
except Exception as e :
269
278
raise TextExtractionError (f"Failed to extract text from PPTX: { str (e )} " ) from e
You can’t perform that action at this time.
0 commit comments