|
24 | 24 | from constants import CHROMA_SETTINGS
|
25 | 25 |
|
26 | 26 |
|
| 27 | +class MyElmLoader(UnstructuredEmailLoader): |
| 28 | + """Wrapper to fallback to text/plain when default does not work""" |
| 29 | + |
| 30 | + def load(self) -> List[Document]: |
| 31 | + """Wrapper adding fallback for elm without html""" |
| 32 | + try: |
| 33 | + try: |
| 34 | + doc = UnstructuredEmailLoader.load(self) |
| 35 | + except ValueError as e: |
| 36 | + if 'text/html content not found in email' in str(e): |
| 37 | + # Try plain text |
| 38 | + self.unstructured_kwargs["content_source"]="text/plain" |
| 39 | + doc = UnstructuredEmailLoader.load(self) |
| 40 | + else: |
| 41 | + raise |
| 42 | + except Exception as e: |
| 43 | + # Add file_path to exception message |
| 44 | + raise type(e)(f"{self.file_path}: {e}") from e |
| 45 | + |
| 46 | + return doc |
| 47 | + |
| 48 | + |
27 | 49 | # Map file extensions to document loaders and their arguments
|
28 | 50 | LOADER_MAPPING = {
|
29 | 51 | ".csv": (CSVLoader, {}),
|
|
47 | 69 | load_dotenv()
|
48 | 70 |
|
49 | 71 |
|
50 |
| -class MyElmLoader(UnstructuredEmailLoader): |
51 |
| - """Wrapper to fallback to text/plain when default does not work""" |
52 |
| - |
53 |
| - def load(self) -> List[Document]: |
54 |
| - """Wrapper adding fallback for elm without html""" |
55 |
| - try: |
56 |
| - doc = UnstructuredEmailLoader.load() |
57 |
| - except ValueError as e: |
58 |
| - if 'text/html content not found in email' in str(e): |
59 |
| - # Try plain text |
60 |
| - self.unstructured_kwargs["content_source"]="text/plain" |
61 |
| - doc = UnstructuredEmailLoader.load() |
62 |
| - else: |
63 |
| - raise |
64 |
| - |
65 |
| - return doc |
66 |
| - |
67 |
| - |
68 | 72 | def load_single_document(file_path: str) -> Document:
|
69 | 73 | ext = "." + file_path.rsplit(".", 1)[-1]
|
70 | 74 | if ext in LOADER_MAPPING:
|
|
0 commit comments