|
43 | 43 | specialsre = re.compile(r'[][\\()<>@,:;".]')
|
44 | 44 | escapesre = re.compile(r'[\\"]')
|
45 | 45 |
|
| 46 | + |
46 | 47 | def _has_surrogates(s):
|
47 | 48 | """Return True if s may contain surrogate-escaped binary data."""
|
48 | 49 | # This check is based on the fact that unless there are surrogates, utf8
|
@@ -103,12 +104,127 @@ def formataddr(pair, charset='utf-8'):
|
103 | 104 | return address
|
104 | 105 |
|
105 | 106 |
|
| 107 | +def _iter_escaped_chars(addr): |
| 108 | + pos = 0 |
| 109 | + escape = False |
| 110 | + for pos, ch in enumerate(addr): |
| 111 | + if escape: |
| 112 | + yield (pos, '\\' + ch) |
| 113 | + escape = False |
| 114 | + elif ch == '\\': |
| 115 | + escape = True |
| 116 | + else: |
| 117 | + yield (pos, ch) |
| 118 | + if escape: |
| 119 | + yield (pos, '\\') |
| 120 | + |
| 121 | + |
| 122 | +def _strip_quoted_realnames(addr): |
| 123 | + """Strip real names between quotes.""" |
| 124 | + if '"' not in addr: |
| 125 | + # Fast path |
| 126 | + return addr |
| 127 | + |
| 128 | + start = 0 |
| 129 | + open_pos = None |
| 130 | + result = [] |
| 131 | + for pos, ch in _iter_escaped_chars(addr): |
| 132 | + if ch == '"': |
| 133 | + if open_pos is None: |
| 134 | + open_pos = pos |
| 135 | + else: |
| 136 | + if start != open_pos: |
| 137 | + result.append(addr[start:open_pos]) |
| 138 | + start = pos + 1 |
| 139 | + open_pos = None |
| 140 | + |
| 141 | + if start < len(addr): |
| 142 | + result.append(addr[start:]) |
| 143 | + |
| 144 | + return ''.join(result) |
106 | 145 |
|
107 |
| -def getaddresses(fieldvalues): |
108 |
| - """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" |
109 |
| - all = COMMASPACE.join(str(v) for v in fieldvalues) |
110 |
| - a = _AddressList(all) |
111 |
| - return a.addresslist |
| 146 | + |
| 147 | +supports_strict_parsing = True |
| 148 | + |
| 149 | +def getaddresses(fieldvalues, *, strict=True): |
| 150 | + """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. |
| 151 | +
|
| 152 | + When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in |
| 153 | + its place. |
| 154 | +
|
| 155 | + If strict is true, use a strict parser which rejects malformed inputs. |
| 156 | + """ |
| 157 | + |
| 158 | + # If strict is true, if the resulting list of parsed addresses is greater |
| 159 | + # than the number of fieldvalues in the input list, a parsing error has |
| 160 | + # occurred and consequently a list containing a single empty 2-tuple [('', |
| 161 | + # '')] is returned in its place. This is done to avoid invalid output. |
| 162 | + # |
| 163 | + # Malformed input: getaddresses(['[email protected] <[email protected]>']) |
| 164 | + # Invalid output: [('', '[email protected]'), ('', '[email protected]')] |
| 165 | + # Safe output: [('', '')] |
| 166 | + |
| 167 | + if not strict: |
| 168 | + all = COMMASPACE.join(str(v) for v in fieldvalues) |
| 169 | + a = _AddressList(all) |
| 170 | + return a.addresslist |
| 171 | + |
| 172 | + fieldvalues = [str(v) for v in fieldvalues] |
| 173 | + fieldvalues = _pre_parse_validation(fieldvalues) |
| 174 | + addr = COMMASPACE.join(fieldvalues) |
| 175 | + a = _AddressList(addr) |
| 176 | + result = _post_parse_validation(a.addresslist) |
| 177 | + |
| 178 | + # Treat output as invalid if the number of addresses is not equal to the |
| 179 | + # expected number of addresses. |
| 180 | + n = 0 |
| 181 | + for v in fieldvalues: |
| 182 | + # When a comma is used in the Real Name part it is not a deliminator. |
| 183 | + # So strip those out before counting the commas. |
| 184 | + v = _strip_quoted_realnames(v) |
| 185 | + # Expected number of addresses: 1 + number of commas |
| 186 | + n += 1 + v.count(',') |
| 187 | + if len(result) != n: |
| 188 | + return [('', '')] |
| 189 | + |
| 190 | + return result |
| 191 | + |
| 192 | + |
| 193 | +def _check_parenthesis(addr): |
| 194 | + # Ignore parenthesis in quoted real names. |
| 195 | + addr = _strip_quoted_realnames(addr) |
| 196 | + |
| 197 | + opens = 0 |
| 198 | + for pos, ch in _iter_escaped_chars(addr): |
| 199 | + if ch == '(': |
| 200 | + opens += 1 |
| 201 | + elif ch == ')': |
| 202 | + opens -= 1 |
| 203 | + if opens < 0: |
| 204 | + return False |
| 205 | + return (opens == 0) |
| 206 | + |
| 207 | + |
| 208 | +def _pre_parse_validation(email_header_fields): |
| 209 | + accepted_values = [] |
| 210 | + for v in email_header_fields: |
| 211 | + if not _check_parenthesis(v): |
| 212 | + v = "('', '')" |
| 213 | + accepted_values.append(v) |
| 214 | + |
| 215 | + return accepted_values |
| 216 | + |
| 217 | + |
| 218 | +def _post_parse_validation(parsed_email_header_tuples): |
| 219 | + accepted_values = [] |
| 220 | + # The parser would have parsed a correctly formatted domain-literal |
| 221 | + # The existence of an [ after parsing indicates a parsing failure |
| 222 | + for v in parsed_email_header_tuples: |
| 223 | + if '[' in v[1]: |
| 224 | + v = ('', '') |
| 225 | + accepted_values.append(v) |
| 226 | + |
| 227 | + return accepted_values |
112 | 228 |
|
113 | 229 |
|
114 | 230 | def _format_timetuple_and_zone(timetuple, zone):
|
@@ -207,16 +323,33 @@ def parsedate_to_datetime(data):
|
207 | 323 | tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
|
208 | 324 |
|
209 | 325 |
|
210 |
| -def parseaddr(addr): |
| 326 | +def parseaddr(addr, *, strict=True): |
211 | 327 | """
|
212 | 328 | Parse addr into its constituent realname and email address parts.
|
213 | 329 |
|
214 | 330 | Return a tuple of realname and email address, unless the parse fails, in
|
215 | 331 | which case return a 2-tuple of ('', '').
|
| 332 | +
|
| 333 | + If strict is True, use a strict parser which rejects malformed inputs. |
216 | 334 | """
|
217 |
| - addrs = _AddressList(addr).addresslist |
218 |
| - if not addrs: |
219 |
| - return '', '' |
| 335 | + if not strict: |
| 336 | + addrs = _AddressList(addr).addresslist |
| 337 | + if not addrs: |
| 338 | + return ('', '') |
| 339 | + return addrs[0] |
| 340 | + |
| 341 | + if isinstance(addr, list): |
| 342 | + addr = addr[0] |
| 343 | + |
| 344 | + if not isinstance(addr, str): |
| 345 | + return ('', '') |
| 346 | + |
| 347 | + addr = _pre_parse_validation([addr])[0] |
| 348 | + addrs = _post_parse_validation(_AddressList(addr).addresslist) |
| 349 | + |
| 350 | + if not addrs or len(addrs) > 1: |
| 351 | + return ('', '') |
| 352 | + |
220 | 353 | return addrs[0]
|
221 | 354 |
|
222 | 355 |
|
|
0 commit comments