Skip to content

Commit a862ff2

Browse files
committed
Add fallback for plain elm #294 #290
1 parent ad64589 commit a862ff2

File tree

1 file changed

+19
-1
lines changed

1 file changed

+19
-1
lines changed

ingest.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
".doc": (UnstructuredWordDocumentLoader, {}),
3232
".docx": (UnstructuredWordDocumentLoader, {}),
3333
".enex": (EverNoteLoader, {}),
34-
".eml": (UnstructuredEmailLoader, {}),
34+
".eml": (MyElmLoader, {}),
3535
".epub": (UnstructuredEPubLoader, {}),
3636
".html": (UnstructuredHTMLLoader, {}),
3737
".md": (UnstructuredMarkdownLoader, {}),
@@ -47,6 +47,24 @@
4747
load_dotenv()
4848

4949

50+
class MyElmLoader(UnstructuredEmailLoader):
51+
"""Wrapper to fallback to text/plain when default does not work"""
52+
53+
def load(self) -> List[Document]:
54+
"""Wrapper adding fallback for elm without html"""
55+
try:
56+
doc = UnstructuredEmailLoader.load()
57+
except ValueError as e:
58+
if 'text/html content not found in email' in str(e):
59+
# Try plain text
60+
self.unstructured_kwargs["content_source"]="text/plain"
61+
doc = UnstructuredEmailLoader.load()
62+
else:
63+
raise
64+
65+
return doc
66+
67+
5068
def load_single_document(file_path: str) -> Document:
5169
ext = "." + file_path.rsplit(".", 1)[-1]
5270
if ext in LOADER_MAPPING:

0 commit comments

Comments
 (0)