Skip to content

Commit f7be505

Browse files
ambvencukoubasbloemsaatserhiy-storchaka
authored
[3.9] gh-121650: Encode newlines in headers, and verify headers are sound (GH-122233) (#122610)
Per RFC 2047: > [...] these encoding schemes allow the > encoding of arbitrary octet values, mail readers that implement this > decoding should also ensure that display of the decoded data on the > recipient's terminal will not cause unwanted side-effects It seems that the "quoted-word" scheme is a valid way to include a newline character in a header value, just like we already allow undecodable bytes or control characters. They do need to be properly quoted when serialized to text, though. This should fail for custom fold() implementations that aren't careful about newlines. (cherry picked from commit 0976339) Co-authored-by: Petr Viktorin <[email protected]> Co-authored-by: Bas Bloemsaat <[email protected]> Co-authored-by: Serhiy Storchaka <[email protected]>
1 parent 3f5d9d1 commit f7be505

File tree

10 files changed

+162
-4
lines changed

10 files changed

+162
-4
lines changed

Doc/library/email.errors.rst

+6
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,12 @@ The following exception classes are defined in the :mod:`email.errors` module:
5959
:class:`~email.mime.image.MIMEImage`).
6060

6161

62+
.. exception:: HeaderWriteError()
63+
64+
Raised when an error occurs when the :mod:`~email.generator` outputs
65+
headers.
66+
67+
6268
Here is the list of the defects that the :class:`~email.parser.FeedParser`
6369
can find while parsing messages. Note that the defects are added to the message
6470
where the problem was found, so for example, if a message nested inside a

Doc/library/email.policy.rst

+18
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,24 @@ added matters. To illustrate::
229229

230230
.. versionadded:: 3.6
231231

232+
233+
.. attribute:: verify_generated_headers
234+
235+
If ``True`` (the default), the generator will raise
236+
:exc:`~email.errors.HeaderWriteError` instead of writing a header
237+
that is improperly folded or delimited, such that it would
238+
be parsed as multiple headers or joined with adjacent data.
239+
Such headers can be generated by custom header classes or bugs
240+
in the ``email`` module.
241+
242+
As it's a security feature, this defaults to ``True`` even in the
243+
:class:`~email.policy.Compat32` policy.
244+
For backwards compatible, but unsafe, behavior, it must be set to
245+
``False`` explicitly.
246+
247+
.. versionadded:: 3.9.20
248+
249+
232250
The following :class:`Policy` method is intended to be called by code using
233251
the email library to create policy instances with custom settings:
234252

Doc/whatsnew/3.9.rst

+12
Original file line numberDiff line numberDiff line change
@@ -1640,3 +1640,15 @@ ipaddress
16401640

16411641
* Fixed ``is_global`` and ``is_private`` behavior in ``IPv4Address``,
16421642
``IPv6Address``, ``IPv4Network`` and ``IPv6Network``.
1643+
1644+
email
1645+
-----
1646+
1647+
* Headers with embedded newlines are now quoted on output.
1648+
1649+
The :mod:`~email.generator` will now refuse to serialize (write) headers
1650+
that are improperly folded or delimited, such that they would be parsed as
1651+
multiple headers or joined with adjacent data.
1652+
If you need to turn this safety feature off,
1653+
set :attr:`~email.policy.Policy.verify_generated_headers`.
1654+
(Contributed by Bas Bloemsaat and Petr Viktorin in :gh:`121650`.)

Lib/email/_header_value_parser.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@
9292
ASPECIALS = TSPECIALS | set("*'%")
9393
ATTRIBUTE_ENDS = ASPECIALS | WSP
9494
EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%')
95+
NLSET = {'\n', '\r'}
96+
SPECIALSNL = SPECIALS | NLSET
9597

9698
def quote_string(value):
9799
return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"'
@@ -2778,9 +2780,13 @@ def _refold_parse_tree(parse_tree, *, policy):
27782780
wrap_as_ew_blocked -= 1
27792781
continue
27802782
tstr = str(part)
2781-
if part.token_type == 'ptext' and set(tstr) & SPECIALS:
2782-
# Encode if tstr contains special characters.
2783-
want_encoding = True
2783+
if not want_encoding:
2784+
if part.token_type == 'ptext':
2785+
# Encode if tstr contains special characters.
2786+
want_encoding = not SPECIALSNL.isdisjoint(tstr)
2787+
else:
2788+
# Encode if tstr contains newlines.
2789+
want_encoding = not NLSET.isdisjoint(tstr)
27842790
try:
27852791
tstr.encode(encoding)
27862792
charset = encoding

Lib/email/_policybase.py

+8
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,13 @@ class Policy(_PolicyBase, metaclass=abc.ABCMeta):
157157
message_factory -- the class to use to create new message objects.
158158
If the value is None, the default is Message.
159159
160+
verify_generated_headers
161+
-- if true, the generator verifies that each header
162+
they are properly folded, so that a parser won't
163+
treat it as multiple headers, start-of-body, or
164+
part of another header.
165+
This is a check against custom Header & fold()
166+
implementations.
160167
"""
161168

162169
raise_on_defect = False
@@ -165,6 +172,7 @@ class Policy(_PolicyBase, metaclass=abc.ABCMeta):
165172
max_line_length = 78
166173
mangle_from_ = False
167174
message_factory = None
175+
verify_generated_headers = True
168176

169177
def handle_defect(self, obj, defect):
170178
"""Based on policy, either raise defect or call register_defect.

Lib/email/errors.py

+4
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@ class CharsetError(MessageError):
2929
"""An illegal charset was given."""
3030

3131

32+
class HeaderWriteError(MessageError):
33+
"""Error while writing headers."""
34+
35+
3236
# These are parsing defects which the parser was able to work around.
3337
class MessageDefect(ValueError):
3438
"""Base class for a message defect."""

Lib/email/generator.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,14 @@
1414
from copy import deepcopy
1515
from io import StringIO, BytesIO
1616
from email.utils import _has_surrogates
17+
from email.errors import HeaderWriteError
1718

1819
UNDERSCORE = '_'
1920
NL = '\n' # XXX: no longer used by the code below.
2021

2122
NLCRE = re.compile(r'\r\n|\r|\n')
2223
fcre = re.compile(r'^From ', re.MULTILINE)
24+
NEWLINE_WITHOUT_FWSP = re.compile(r'\r\n[^ \t]|\r[^ \n\t]|\n[^ \t]')
2325

2426

2527

@@ -223,7 +225,16 @@ def _dispatch(self, msg):
223225

224226
def _write_headers(self, msg):
225227
for h, v in msg.raw_items():
226-
self.write(self.policy.fold(h, v))
228+
folded = self.policy.fold(h, v)
229+
if self.policy.verify_generated_headers:
230+
linesep = self.policy.linesep
231+
if not folded.endswith(self.policy.linesep):
232+
raise HeaderWriteError(
233+
f'folded header does not end with {linesep!r}: {folded!r}')
234+
if NEWLINE_WITHOUT_FWSP.search(folded.removesuffix(linesep)):
235+
raise HeaderWriteError(
236+
f'folded header contains newline: {folded!r}')
237+
self.write(folded)
227238
# A blank line always separates headers from body
228239
self.write(self._NL)
229240

Lib/test/test_email/test_generator.py

+62
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from email.generator import Generator, BytesGenerator
77
from email.headerregistry import Address
88
from email import policy
9+
import email.errors
910
from test.test_email import TestEmailBase, parameterize
1011

1112

@@ -216,6 +217,44 @@ def test_rfc2231_wrapping_switches_to_default_len_if_too_narrow(self):
216217
g.flatten(msg)
217218
self.assertEqual(s.getvalue(), self.typ(expected))
218219

220+
def test_keep_encoded_newlines(self):
221+
msg = self.msgmaker(self.typ(textwrap.dedent("""\
222+
To: nobody
223+
Subject: Bad subject=?UTF-8?Q?=0A?=Bcc: [email protected]
224+
225+
None
226+
""")))
227+
expected = textwrap.dedent("""\
228+
To: nobody
229+
Subject: Bad subject=?UTF-8?Q?=0A?=Bcc: [email protected]
230+
231+
None
232+
""")
233+
s = self.ioclass()
234+
g = self.genclass(s, policy=self.policy.clone(max_line_length=80))
235+
g.flatten(msg)
236+
self.assertEqual(s.getvalue(), self.typ(expected))
237+
238+
def test_keep_long_encoded_newlines(self):
239+
msg = self.msgmaker(self.typ(textwrap.dedent("""\
240+
To: nobody
241+
Subject: Bad subject=?UTF-8?Q?=0A?=Bcc: [email protected]
242+
243+
None
244+
""")))
245+
expected = textwrap.dedent("""\
246+
To: nobody
247+
Subject: Bad subject
248+
=?utf-8?q?=0A?=Bcc:
249+
250+
251+
None
252+
""")
253+
s = self.ioclass()
254+
g = self.genclass(s, policy=self.policy.clone(max_line_length=30))
255+
g.flatten(msg)
256+
self.assertEqual(s.getvalue(), self.typ(expected))
257+
219258

220259
class TestGenerator(TestGeneratorBase, TestEmailBase):
221260

@@ -224,6 +263,29 @@ class TestGenerator(TestGeneratorBase, TestEmailBase):
224263
ioclass = io.StringIO
225264
typ = str
226265

266+
def test_verify_generated_headers(self):
267+
"""gh-121650: by default the generator prevents header injection"""
268+
class LiteralHeader(str):
269+
name = 'Header'
270+
def fold(self, **kwargs):
271+
return self
272+
273+
for text in (
274+
'Value\r\nBad Injection\r\n',
275+
'NoNewLine'
276+
):
277+
with self.subTest(text=text):
278+
message = message_from_string(
279+
"Header: Value\r\n\r\nBody",
280+
policy=self.policy,
281+
)
282+
283+
del message['Header']
284+
message['Header'] = LiteralHeader(text)
285+
286+
with self.assertRaises(email.errors.HeaderWriteError):
287+
message.as_string()
288+
227289

228290
class TestBytesGenerator(TestGeneratorBase, TestEmailBase):
229291

Lib/test/test_email/test_policy.py

+26
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ class PolicyAPITests(unittest.TestCase):
2626
'raise_on_defect': False,
2727
'mangle_from_': True,
2828
'message_factory': None,
29+
'verify_generated_headers': True,
2930
}
3031
# These default values are the ones set on email.policy.default.
3132
# If any of these defaults change, the docs must be updated.
@@ -277,6 +278,31 @@ def test_short_maxlen_error(self):
277278
with self.assertRaises(email.errors.HeaderParseError):
278279
policy.fold("Subject", subject)
279280

281+
def test_verify_generated_headers(self):
282+
"""Turning protection off allows header injection"""
283+
policy = email.policy.default.clone(verify_generated_headers=False)
284+
for text in (
285+
'Header: Value\r\nBad: Injection\r\n',
286+
'Header: NoNewLine'
287+
):
288+
with self.subTest(text=text):
289+
message = email.message_from_string(
290+
"Header: Value\r\n\r\nBody",
291+
policy=policy,
292+
)
293+
class LiteralHeader(str):
294+
name = 'Header'
295+
def fold(self, **kwargs):
296+
return self
297+
298+
del message['Header']
299+
message['Header'] = LiteralHeader(text)
300+
301+
self.assertEqual(
302+
message.as_string(),
303+
f"{text}\nBody",
304+
)
305+
280306
# XXX: Need subclassing tests.
281307
# For adding subclassed objects, make sure the usual rules apply (subclass
282308
# wins), but that the order still works (right overrides left).
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
:mod:`email` headers with embedded newlines are now quoted on output. The
2+
:mod:`~email.generator` will now refuse to serialize (write) headers that
3+
are unsafely folded or delimited; see
4+
:attr:`~email.policy.Policy.verify_generated_headers`. (Contributed by Bas
5+
Bloemsaat and Petr Viktorin in :gh:`121650`.)

0 commit comments

Comments
 (0)