Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit fcf3c70

Browse files
authored
Ensure that we do not cache empty sync responses after a timeout (#10158)
Fixes #8518 by telling the ResponseCache not to cache the /sync response if the next_batch param is the same as the since token.
1 parent 9cf6e0e commit fcf3c70

File tree

8 files changed

+84
-21
lines changed

8 files changed

+84
-21
lines changed

changelog.d/10157.bugfix

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix a bug introduced in v1.21.0 which could cause `/sync` to return immediately with an empty response.

changelog.d/10157.misc

Lines changed: 0 additions & 1 deletion
This file was deleted.

changelog.d/10158.bugfix

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix a bug introduced in v1.21.0 which could cause `/sync` to return immediately with an empty response.

synapse/handlers/sync.py

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
from synapse.util.async_helpers import concurrently_execute
5050
from synapse.util.caches.expiringcache import ExpiringCache
5151
from synapse.util.caches.lrucache import LruCache
52-
from synapse.util.caches.response_cache import ResponseCache
52+
from synapse.util.caches.response_cache import ResponseCache, ResponseCacheContext
5353
from synapse.util.metrics import Measure, measure_func
5454
from synapse.visibility import filter_events_for_client
5555

@@ -83,12 +83,15 @@
8383
LAZY_LOADED_MEMBERS_CACHE_MAX_SIZE = 100
8484

8585

86+
SyncRequestKey = Tuple[Any, ...]
87+
88+
8689
@attr.s(slots=True, frozen=True)
8790
class SyncConfig:
8891
user = attr.ib(type=UserID)
8992
filter_collection = attr.ib(type=FilterCollection)
9093
is_guest = attr.ib(type=bool)
91-
request_key = attr.ib(type=Tuple[Any, ...])
94+
request_key = attr.ib(type=SyncRequestKey)
9295
device_id = attr.ib(type=Optional[str])
9396

9497

@@ -266,9 +269,9 @@ def __init__(self, hs: "HomeServer"):
266269
self.presence_handler = hs.get_presence_handler()
267270
self.event_sources = hs.get_event_sources()
268271
self.clock = hs.get_clock()
269-
self.response_cache = ResponseCache(
272+
self.response_cache: ResponseCache[SyncRequestKey] = ResponseCache(
270273
hs.get_clock(), "sync"
271-
) # type: ResponseCache[Tuple[Any, ...]]
274+
)
272275
self.state = hs.get_state_handler()
273276
self.auth = hs.get_auth()
274277
self.storage = hs.get_storage()
@@ -307,16 +310,18 @@ async def wait_for_sync_for_user(
307310
since_token,
308311
timeout,
309312
full_state,
313+
cache_context=True,
310314
)
311315
logger.debug("Returning sync response for %s", user_id)
312316
return res
313317

314318
async def _wait_for_sync_for_user(
315319
self,
316320
sync_config: SyncConfig,
317-
since_token: Optional[StreamToken] = None,
318-
timeout: int = 0,
319-
full_state: bool = False,
321+
since_token: Optional[StreamToken],
322+
timeout: int,
323+
full_state: bool,
324+
cache_context: ResponseCacheContext[SyncRequestKey],
320325
) -> SyncResult:
321326
if since_token is None:
322327
sync_type = "initial_sync"
@@ -343,13 +348,13 @@ async def _wait_for_sync_for_user(
343348
if timeout == 0 or since_token is None or full_state:
344349
# we are going to return immediately, so don't bother calling
345350
# notifier.wait_for_events.
346-
result = await self.current_sync_for_user(
351+
result: SyncResult = await self.current_sync_for_user(
347352
sync_config, since_token, full_state=full_state
348353
)
349354
else:
350355

351-
def current_sync_callback(before_token, after_token):
352-
return self.current_sync_for_user(sync_config, since_token)
356+
async def current_sync_callback(before_token, after_token) -> SyncResult:
357+
return await self.current_sync_for_user(sync_config, since_token)
353358

354359
result = await self.notifier.wait_for_events(
355360
sync_config.user.to_string(),
@@ -358,6 +363,17 @@ def current_sync_callback(before_token, after_token):
358363
from_token=since_token,
359364
)
360365

366+
# if nothing has happened in any of the users' rooms since /sync was called,
367+
# the resultant next_batch will be the same as since_token (since the result
368+
# is generated when wait_for_events is first called, and not regenerated
369+
# when wait_for_events times out).
370+
#
371+
# If that happens, we mustn't cache it, so that when the client comes back
372+
# with the same cache token, we don't immediately return the same empty
373+
# result, causing a tightloop. (#8518)
374+
if result.next_batch == since_token:
375+
cache_context.should_cache = False
376+
361377
if result:
362378
if sync_config.filter_collection.lazy_load_members():
363379
lazy_loaded = "true"

synapse/python_dependencies.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,9 @@
7575
"phonenumbers>=8.2.0",
7676
# we use GaugeHistogramMetric, which was added in prom-client 0.4.0.
7777
"prometheus_client>=0.4.0",
78-
# we use attr.validators.deep_iterable, which arrived in 19.1.0 (Note:
79-
# Fedora 31 only has 19.1, so if we want to upgrade we should wait until 33
80-
# is out in November.)
78+
# we use `order`, which arrived in attrs 19.2.0.
8179
# Note: 21.1.0 broke `/sync`, see #9936
82-
"attrs>=19.1.0,!=21.1.0",
80+
"attrs>=19.2.0,!=21.1.0",
8381
"netaddr>=0.7.18",
8482
"Jinja2>=2.9",
8583
"bleach>=1.4.3",

synapse/types.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ def f2(m):
404404
return username.decode("ascii")
405405

406406

407-
@attr.s(frozen=True, slots=True, cmp=False)
407+
@attr.s(frozen=True, slots=True, order=False)
408408
class RoomStreamToken:
409409
"""Tokens are positions between events. The token "s1" comes after event 1.
410410

tests/rest/client/v2_alpha/test_sync.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -558,3 +558,53 @@ def _check_unread_count(self, expected_count: int):
558558

559559
# Store the next batch for the next request.
560560
self.next_batch = channel.json_body["next_batch"]
561+
562+
563+
class SyncCacheTestCase(unittest.HomeserverTestCase):
564+
servlets = [
565+
synapse.rest.admin.register_servlets,
566+
login.register_servlets,
567+
sync.register_servlets,
568+
]
569+
570+
def test_noop_sync_does_not_tightloop(self):
571+
"""If the sync times out, we shouldn't cache the result
572+
573+
Essentially a regression test for #8518.
574+
"""
575+
self.user_id = self.register_user("kermit", "monkey")
576+
self.tok = self.login("kermit", "monkey")
577+
578+
# we should immediately get an initial sync response
579+
channel = self.make_request("GET", "/sync", access_token=self.tok)
580+
self.assertEqual(channel.code, 200, channel.json_body)
581+
582+
# now, make an incremental sync request, with a timeout
583+
next_batch = channel.json_body["next_batch"]
584+
channel = self.make_request(
585+
"GET",
586+
f"/sync?since={next_batch}&timeout=10000",
587+
access_token=self.tok,
588+
await_result=False,
589+
)
590+
# that should block for 10 seconds
591+
with self.assertRaises(TimedOutException):
592+
channel.await_result(timeout_ms=9900)
593+
channel.await_result(timeout_ms=200)
594+
self.assertEqual(channel.code, 200, channel.json_body)
595+
596+
# we expect the next_batch in the result to be the same as before
597+
self.assertEqual(channel.json_body["next_batch"], next_batch)
598+
599+
# another incremental sync should also block.
600+
channel = self.make_request(
601+
"GET",
602+
f"/sync?since={next_batch}&timeout=10000",
603+
access_token=self.tok,
604+
await_result=False,
605+
)
606+
# that should block for 10 seconds
607+
with self.assertRaises(TimedOutException):
608+
channel.await_result(timeout_ms=9900)
609+
channel.await_result(timeout_ms=200)
610+
self.assertEqual(channel.code, 200, channel.json_body)

tests/server.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -138,21 +138,19 @@ def isSecure(self):
138138
def transport(self):
139139
return self
140140

141-
def await_result(self, timeout: int = 100) -> None:
141+
def await_result(self, timeout_ms: int = 1000) -> None:
142142
"""
143143
Wait until the request is finished.
144144
"""
145+
end_time = self._reactor.seconds() + timeout_ms / 1000.0
145146
self._reactor.run()
146-
x = 0
147147

148148
while not self.is_finished():
149149
# If there's a producer, tell it to resume producing so we get content
150150
if self._producer:
151151
self._producer.resumeProducing()
152152

153-
x += 1
154-
155-
if x > timeout:
153+
if self._reactor.seconds() > end_time:
156154
raise TimedOutException("Timed out waiting for request to finish.")
157155

158156
self._reactor.advance(0.1)

0 commit comments

Comments
 (0)