Skip to content

Commit 68e8496

Browse files
authored
[PR #8608/c4acabc backport][3.10] Fix timer handle churn in websocket heartbeat (#8639)
Co-authored-by: Sam Bull <[email protected]> (cherry picked from commit c4acabc)
1 parent 72f41aa commit 68e8496

7 files changed

+331
-98
lines changed

CHANGES/8608.misc.rst

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Improved websocket performance when messages are sent or received frequently -- by :user:`bdraco`.
2+
3+
The WebSocket heartbeat scheduling algorithm was improved to reduce the ``asyncio`` scheduling overhead by decreasing the number of ``asyncio.TimerHandle`` creations and cancellations.

aiohttp/client_ws.py

+72-43
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from .client_exceptions import ClientError, ServerTimeoutError
88
from .client_reqrep import ClientResponse
9-
from .helpers import call_later, set_result
9+
from .helpers import calculate_timeout_when, set_result
1010
from .http import (
1111
WS_CLOSED_MESSAGE,
1212
WS_CLOSING_MESSAGE,
@@ -62,6 +62,7 @@ def __init__(
6262
self._autoping = autoping
6363
self._heartbeat = heartbeat
6464
self._heartbeat_cb: Optional[asyncio.TimerHandle] = None
65+
self._heartbeat_when: float = 0.0
6566
if heartbeat is not None:
6667
self._pong_heartbeat = heartbeat / 2.0
6768
self._pong_response_cb: Optional[asyncio.TimerHandle] = None
@@ -75,52 +76,64 @@ def __init__(
7576
self._reset_heartbeat()
7677

7778
def _cancel_heartbeat(self) -> None:
78-
if self._pong_response_cb is not None:
79-
self._pong_response_cb.cancel()
80-
self._pong_response_cb = None
81-
79+
self._cancel_pong_response_cb()
8280
if self._heartbeat_cb is not None:
8381
self._heartbeat_cb.cancel()
8482
self._heartbeat_cb = None
8583

86-
def _reset_heartbeat(self) -> None:
87-
self._cancel_heartbeat()
84+
def _cancel_pong_response_cb(self) -> None:
85+
if self._pong_response_cb is not None:
86+
self._pong_response_cb.cancel()
87+
self._pong_response_cb = None
8888

89-
if self._heartbeat is not None:
90-
self._heartbeat_cb = call_later(
91-
self._send_heartbeat,
92-
self._heartbeat,
93-
self._loop,
94-
timeout_ceil_threshold=(
95-
self._conn._connector._timeout_ceil_threshold
96-
if self._conn is not None
97-
else 5
98-
),
99-
)
89+
def _reset_heartbeat(self) -> None:
90+
if self._heartbeat is None:
91+
return
92+
self._cancel_pong_response_cb()
93+
loop = self._loop
94+
assert loop is not None
95+
conn = self._conn
96+
timeout_ceil_threshold = (
97+
conn._connector._timeout_ceil_threshold if conn is not None else 5
98+
)
99+
now = loop.time()
100+
when = calculate_timeout_when(now, self._heartbeat, timeout_ceil_threshold)
101+
self._heartbeat_when = when
102+
if self._heartbeat_cb is None:
103+
# We do not cancel the previous heartbeat_cb here because
104+
# it generates a significant amount of TimerHandle churn
105+
# which causes asyncio to rebuild the heap frequently.
106+
# Instead _send_heartbeat() will reschedule the next
107+
# heartbeat if it fires too early.
108+
self._heartbeat_cb = loop.call_at(when, self._send_heartbeat)
100109

101110
def _send_heartbeat(self) -> None:
102-
if self._heartbeat is not None and not self._closed:
103-
# fire-and-forget a task is not perfect but maybe ok for
104-
# sending ping. Otherwise we need a long-living heartbeat
105-
# task in the class.
106-
self._loop.create_task(self._writer.ping())
107-
108-
if self._pong_response_cb is not None:
109-
self._pong_response_cb.cancel()
110-
self._pong_response_cb = call_later(
111-
self._pong_not_received,
112-
self._pong_heartbeat,
113-
self._loop,
114-
timeout_ceil_threshold=(
115-
self._conn._connector._timeout_ceil_threshold
116-
if self._conn is not None
117-
else 5
118-
),
111+
self._heartbeat_cb = None
112+
loop = self._loop
113+
now = loop.time()
114+
if now < self._heartbeat_when:
115+
# Heartbeat fired too early, reschedule
116+
self._heartbeat_cb = loop.call_at(
117+
self._heartbeat_when, self._send_heartbeat
119118
)
119+
return
120+
121+
# fire-and-forget a task is not perfect but maybe ok for
122+
# sending ping. Otherwise we need a long-living heartbeat
123+
# task in the class.
124+
loop.create_task(self._writer.ping()) # type: ignore[unused-awaitable]
125+
126+
conn = self._conn
127+
timeout_ceil_threshold = (
128+
conn._connector._timeout_ceil_threshold if conn is not None else 5
129+
)
130+
when = calculate_timeout_when(now, self._pong_heartbeat, timeout_ceil_threshold)
131+
self._cancel_pong_response_cb()
132+
self._pong_response_cb = loop.call_at(when, self._pong_not_received)
120133

121134
def _pong_not_received(self) -> None:
122135
if not self._closed:
123-
self._closed = True
136+
self._set_closed()
124137
self._close_code = WSCloseCode.ABNORMAL_CLOSURE
125138
self._exception = ServerTimeoutError()
126139
self._response.close()
@@ -129,6 +142,22 @@ def _pong_not_received(self) -> None:
129142
WSMessage(WSMsgType.ERROR, self._exception, None)
130143
)
131144

145+
def _set_closed(self) -> None:
146+
"""Set the connection to closed.
147+
148+
Cancel any heartbeat timers and set the closed flag.
149+
"""
150+
self._closed = True
151+
self._cancel_heartbeat()
152+
153+
def _set_closing(self) -> None:
154+
"""Set the connection to closing.
155+
156+
Cancel any heartbeat timers and set the closing flag.
157+
"""
158+
self._closing = True
159+
self._cancel_heartbeat()
160+
132161
@property
133162
def closed(self) -> bool:
134163
return self._closed
@@ -193,13 +222,12 @@ async def close(self, *, code: int = WSCloseCode.OK, message: bytes = b"") -> bo
193222
if self._waiting and not self._closing:
194223
assert self._loop is not None
195224
self._close_wait = self._loop.create_future()
196-
self._closing = True
225+
self._set_closing()
197226
self._reader.feed_data(WS_CLOSING_MESSAGE, 0)
198227
await self._close_wait
199228

200229
if not self._closed:
201-
self._cancel_heartbeat()
202-
self._closed = True
230+
self._set_closed()
203231
try:
204232
await self._writer.close(code, message)
205233
except asyncio.CancelledError:
@@ -266,7 +294,8 @@ async def receive(self, timeout: Optional[float] = None) -> WSMessage:
266294
await self.close()
267295
return WSMessage(WSMsgType.CLOSED, None, None)
268296
except ClientError:
269-
self._closed = True
297+
# Likely ServerDisconnectedError when connection is lost
298+
self._set_closed()
270299
self._close_code = WSCloseCode.ABNORMAL_CLOSURE
271300
return WS_CLOSED_MESSAGE
272301
except WebSocketError as exc:
@@ -275,18 +304,18 @@ async def receive(self, timeout: Optional[float] = None) -> WSMessage:
275304
return WSMessage(WSMsgType.ERROR, exc, None)
276305
except Exception as exc:
277306
self._exception = exc
278-
self._closing = True
307+
self._set_closing()
279308
self._close_code = WSCloseCode.ABNORMAL_CLOSURE
280309
await self.close()
281310
return WSMessage(WSMsgType.ERROR, exc, None)
282311

283312
if msg.type is WSMsgType.CLOSE:
284-
self._closing = True
313+
self._set_closing()
285314
self._close_code = msg.data
286315
if not self._closed and self._autoclose:
287316
await self.close()
288317
elif msg.type is WSMsgType.CLOSING:
289-
self._closing = True
318+
self._set_closing()
290319
elif msg.type is WSMsgType.PING and self._autoping:
291320
await self.pong(msg.data)
292321
continue

aiohttp/helpers.py

+17-6
Original file line numberDiff line numberDiff line change
@@ -586,12 +586,23 @@ def call_later(
586586
loop: asyncio.AbstractEventLoop,
587587
timeout_ceil_threshold: float = 5,
588588
) -> Optional[asyncio.TimerHandle]:
589-
if timeout is not None and timeout > 0:
590-
when = loop.time() + timeout
591-
if timeout > timeout_ceil_threshold:
592-
when = ceil(when)
593-
return loop.call_at(when, cb)
594-
return None
589+
if timeout is None or timeout <= 0:
590+
return None
591+
now = loop.time()
592+
when = calculate_timeout_when(now, timeout, timeout_ceil_threshold)
593+
return loop.call_at(when, cb)
594+
595+
596+
def calculate_timeout_when(
597+
loop_time: float,
598+
timeout: float,
599+
timeout_ceiling_threshold: float,
600+
) -> float:
601+
"""Calculate when to execute a timeout."""
602+
when = loop_time + timeout
603+
if timeout > timeout_ceiling_threshold:
604+
return ceil(when)
605+
return when
595606

596607

597608
class TimeoutHandle:

aiohttp/web_ws.py

+61-39
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
from . import hdrs
1313
from .abc import AbstractStreamWriter
14-
from .helpers import call_later, set_exception, set_result
14+
from .helpers import calculate_timeout_when, set_exception, set_result
1515
from .http import (
1616
WS_CLOSED_MESSAGE,
1717
WS_CLOSING_MESSAGE,
@@ -89,6 +89,7 @@ def __init__(
8989
self._autoclose = autoclose
9090
self._autoping = autoping
9191
self._heartbeat = heartbeat
92+
self._heartbeat_when = 0.0
9293
self._heartbeat_cb: Optional[asyncio.TimerHandle] = None
9394
if heartbeat is not None:
9495
self._pong_heartbeat = heartbeat / 2.0
@@ -97,57 +98,76 @@ def __init__(
9798
self._max_msg_size = max_msg_size
9899

99100
def _cancel_heartbeat(self) -> None:
100-
if self._pong_response_cb is not None:
101-
self._pong_response_cb.cancel()
102-
self._pong_response_cb = None
103-
101+
self._cancel_pong_response_cb()
104102
if self._heartbeat_cb is not None:
105103
self._heartbeat_cb.cancel()
106104
self._heartbeat_cb = None
107105

108-
def _reset_heartbeat(self) -> None:
109-
self._cancel_heartbeat()
106+
def _cancel_pong_response_cb(self) -> None:
107+
if self._pong_response_cb is not None:
108+
self._pong_response_cb.cancel()
109+
self._pong_response_cb = None
110110

111-
if self._heartbeat is not None:
112-
assert self._loop is not None
113-
self._heartbeat_cb = call_later(
114-
self._send_heartbeat,
115-
self._heartbeat,
116-
self._loop,
117-
timeout_ceil_threshold=(
118-
self._req._protocol._timeout_ceil_threshold
119-
if self._req is not None
120-
else 5
121-
),
122-
)
111+
def _reset_heartbeat(self) -> None:
112+
if self._heartbeat is None:
113+
return
114+
self._cancel_pong_response_cb()
115+
req = self._req
116+
timeout_ceil_threshold = (
117+
req._protocol._timeout_ceil_threshold if req is not None else 5
118+
)
119+
loop = self._loop
120+
assert loop is not None
121+
now = loop.time()
122+
when = calculate_timeout_when(now, self._heartbeat, timeout_ceil_threshold)
123+
self._heartbeat_when = when
124+
if self._heartbeat_cb is None:
125+
# We do not cancel the previous heartbeat_cb here because
126+
# it generates a significant amount of TimerHandle churn
127+
# which causes asyncio to rebuild the heap frequently.
128+
# Instead _send_heartbeat() will reschedule the next
129+
# heartbeat if it fires too early.
130+
self._heartbeat_cb = loop.call_at(when, self._send_heartbeat)
123131

124132
def _send_heartbeat(self) -> None:
125-
if self._heartbeat is not None and not self._closed:
126-
assert self._loop is not None
127-
# fire-and-forget a task is not perfect but maybe ok for
128-
# sending ping. Otherwise we need a long-living heartbeat
129-
# task in the class.
130-
self._loop.create_task(self._writer.ping()) # type: ignore[union-attr]
131-
132-
if self._pong_response_cb is not None:
133-
self._pong_response_cb.cancel()
134-
self._pong_response_cb = call_later(
135-
self._pong_not_received,
136-
self._pong_heartbeat,
137-
self._loop,
138-
timeout_ceil_threshold=(
139-
self._req._protocol._timeout_ceil_threshold
140-
if self._req is not None
141-
else 5
142-
),
133+
self._heartbeat_cb = None
134+
loop = self._loop
135+
assert loop is not None and self._writer is not None
136+
now = loop.time()
137+
if now < self._heartbeat_when:
138+
# Heartbeat fired too early, reschedule
139+
self._heartbeat_cb = loop.call_at(
140+
self._heartbeat_when, self._send_heartbeat
143141
)
142+
return
143+
144+
# fire-and-forget a task is not perfect but maybe ok for
145+
# sending ping. Otherwise we need a long-living heartbeat
146+
# task in the class.
147+
loop.create_task(self._writer.ping()) # type: ignore[unused-awaitable]
148+
149+
req = self._req
150+
timeout_ceil_threshold = (
151+
req._protocol._timeout_ceil_threshold if req is not None else 5
152+
)
153+
when = calculate_timeout_when(now, self._pong_heartbeat, timeout_ceil_threshold)
154+
self._cancel_pong_response_cb()
155+
self._pong_response_cb = loop.call_at(when, self._pong_not_received)
144156

145157
def _pong_not_received(self) -> None:
146158
if self._req is not None and self._req.transport is not None:
147-
self._closed = True
159+
self._set_closed()
148160
self._set_code_close_transport(WSCloseCode.ABNORMAL_CLOSURE)
149161
self._exception = asyncio.TimeoutError()
150162

163+
def _set_closed(self) -> None:
164+
"""Set the connection to closed.
165+
166+
Cancel any heartbeat timers and set the closed flag.
167+
"""
168+
self._closed = True
169+
self._cancel_heartbeat()
170+
151171
async def prepare(self, request: BaseRequest) -> AbstractStreamWriter:
152172
# make pre-check to don't hide it by do_handshake() exceptions
153173
if self._payload_writer is not None:
@@ -387,7 +407,7 @@ async def close(
387407
if self._closed:
388408
return False
389409

390-
self._closed = True
410+
self._set_closed()
391411
try:
392412
await self._writer.close(code, message)
393413
writer = self._payload_writer
@@ -431,6 +451,7 @@ def _set_closing(self, code: WSCloseCode) -> None:
431451
"""Set the close code and mark the connection as closing."""
432452
self._closing = True
433453
self._close_code = code
454+
self._cancel_heartbeat()
434455

435456
def _set_code_close_transport(self, code: WSCloseCode) -> None:
436457
"""Set the close code and close the transport."""
@@ -543,5 +564,6 @@ def _cancel(self, exc: BaseException) -> None:
543564
# web_protocol calls this from connection_lost
544565
# or when the server is shutting down.
545566
self._closing = True
567+
self._cancel_heartbeat()
546568
if self._reader is not None:
547569
set_exception(self._reader, exc)

0 commit comments

Comments
 (0)