Skip to content

Commit 4cda348

Browse files
committed
Fix #294 (tested)
1 parent a862ff2 commit 4cda348

File tree

1 file changed

+22
-18
lines changed

1 file changed

+22
-18
lines changed

ingest.py

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,28 @@
2424
from constants import CHROMA_SETTINGS
2525

2626

27+
class MyElmLoader(UnstructuredEmailLoader):
28+
"""Wrapper to fallback to text/plain when default does not work"""
29+
30+
def load(self) -> List[Document]:
31+
"""Wrapper adding fallback for elm without html"""
32+
try:
33+
try:
34+
doc = UnstructuredEmailLoader.load(self)
35+
except ValueError as e:
36+
if 'text/html content not found in email' in str(e):
37+
# Try plain text
38+
self.unstructured_kwargs["content_source"]="text/plain"
39+
doc = UnstructuredEmailLoader.load(self)
40+
else:
41+
raise
42+
except Exception as e:
43+
# Add file_path to exception message
44+
raise type(e)(f"{self.file_path}: {e}") from e
45+
46+
return doc
47+
48+
2749
# Map file extensions to document loaders and their arguments
2850
LOADER_MAPPING = {
2951
".csv": (CSVLoader, {}),
@@ -47,24 +69,6 @@
4769
load_dotenv()
4870

4971

50-
class MyElmLoader(UnstructuredEmailLoader):
51-
"""Wrapper to fallback to text/plain when default does not work"""
52-
53-
def load(self) -> List[Document]:
54-
"""Wrapper adding fallback for elm without html"""
55-
try:
56-
doc = UnstructuredEmailLoader.load()
57-
except ValueError as e:
58-
if 'text/html content not found in email' in str(e):
59-
# Try plain text
60-
self.unstructured_kwargs["content_source"]="text/plain"
61-
doc = UnstructuredEmailLoader.load()
62-
else:
63-
raise
64-
65-
return doc
66-
67-
6872
def load_single_document(file_path: str) -> Document:
6973
ext = "." + file_path.rsplit(".", 1)[-1]
7074
if ext in LOADER_MAPPING:

0 commit comments

Comments
 (0)