Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit c737744

Browse files
authored
Fix bug in device list caching when remote users leave rooms (#13749)
When a remote user leaves the last room shared with the homeserver, we have to mark their device list as unsubscribed, otherwise we would hold on to a stale device list in our cache. Crucially, the device list would remain cached even after the remote user rejoined the room, which could lead to E2EE failures until the next change to the remote user's device list. Fixes #13651. Signed-off-by: Sean Quah <[email protected]>
1 parent 21687ec commit c737744

File tree

5 files changed

+51
-15
lines changed

5 files changed

+51
-15
lines changed

changelog.d/13749.bugfix

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix a long standing bug where device lists would remain cached when remote users left and rejoined the last room shared with the local homeserver.

synapse/handlers/device.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@
4545
JsonDict,
4646
StreamKeyType,
4747
StreamToken,
48-
UserID,
4948
get_domain_from_id,
5049
get_verify_key_from_cross_signing_key,
5150
)
@@ -324,8 +323,6 @@ def __init__(self, hs: "HomeServer"):
324323
self.device_list_updater.incoming_device_list_update,
325324
)
326325

327-
hs.get_distributor().observe("user_left_room", self.user_left_room)
328-
329326
# Whether `_handle_new_device_update_async` is currently processing.
330327
self._handle_new_device_update_is_processing = False
331328

@@ -569,14 +566,6 @@ async def notify_user_signature_update(
569566
StreamKeyType.DEVICE_LIST, position, users=[from_user_id]
570567
)
571568

572-
async def user_left_room(self, user: UserID, room_id: str) -> None:
573-
user_id = user.to_string()
574-
room_ids = await self.store.get_rooms_for_user(user_id)
575-
if not room_ids:
576-
# We no longer share rooms with this user, so we'll no longer
577-
# receive device updates. Mark this in DB.
578-
await self.store.mark_remote_user_device_list_as_unsubscribed(user_id)
579-
580569
async def store_dehydrated_device(
581570
self,
582571
user_id: str,

synapse/handlers/e2e_keys.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,32 @@ async def query_devices(
175175
user_ids_not_in_cache,
176176
remote_results,
177177
) = await self.store.get_user_devices_from_cache(query_list)
178+
179+
# Check that the homeserver still shares a room with all cached users.
180+
# Note that this check may be slightly racy when a remote user leaves a
181+
# room after we have fetched their cached device list. In the worst case
182+
# we will do extra federation queries for devices that we had cached.
183+
cached_users = set(remote_results.keys())
184+
valid_cached_users = (
185+
await self.store.get_users_server_still_shares_room_with(
186+
remote_results.keys()
187+
)
188+
)
189+
invalid_cached_users = cached_users - valid_cached_users
190+
if invalid_cached_users:
191+
# Fix up results. If we get here, there is either a bug in device
192+
# list tracking, or we hit the race mentioned above.
193+
user_ids_not_in_cache.update(invalid_cached_users)
194+
for invalid_user_id in invalid_cached_users:
195+
remote_results.pop(invalid_user_id)
196+
# This log message may be removed if it turns out it's almost
197+
# entirely triggered by races.
198+
logger.error(
199+
"Devices for %s were cached, but the server no longer shares "
200+
"any rooms with them. The cached device lists are stale.",
201+
invalid_cached_users,
202+
)
203+
178204
for user_id, devices in remote_results.items():
179205
user_devices = results.setdefault(user_id, {})
180206
for device_id, device in devices.items():

synapse/storage/controllers/persist_events.py

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -598,9 +598,9 @@ async def _persist_event_batch(
598598
# room
599599
state_delta_for_room: Dict[str, DeltaState] = {}
600600

601-
# Set of remote users which were in rooms the server has left. We
602-
# should check if we still share any rooms and if not we mark their
603-
# device lists as stale.
601+
# Set of remote users which were in rooms the server has left or who may
602+
# have left rooms the server is in. We should check if we still share any
603+
# rooms and if not we mark their device lists as stale.
604604
potentially_left_users: Set[str] = set()
605605

606606
if not backfilled:
@@ -725,6 +725,20 @@ async def _persist_event_batch(
725725
current_state = {}
726726
delta.no_longer_in_room = True
727727

728+
# Add all remote users that might have left rooms.
729+
potentially_left_users.update(
730+
user_id
731+
for event_type, user_id in delta.to_delete
732+
if event_type == EventTypes.Member
733+
and not self.is_mine_id(user_id)
734+
)
735+
potentially_left_users.update(
736+
user_id
737+
for event_type, user_id in delta.to_insert.keys()
738+
if event_type == EventTypes.Member
739+
and not self.is_mine_id(user_id)
740+
)
741+
728742
state_delta_for_room[room_id] = delta
729743

730744
await self.persist_events_store._persist_events_and_state_updates(

tests/handlers/test_e2e_keys.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -891,14 +891,20 @@ def test_query_all_devices_caches_result(self, device_ids: Iterable[str]) -> Non
891891
new_callable=mock.MagicMock,
892892
return_value=make_awaitable(["some_room_id"]),
893893
)
894+
mock_get_users = mock.patch.object(
895+
self.store,
896+
"get_users_server_still_shares_room_with",
897+
new_callable=mock.MagicMock,
898+
return_value=make_awaitable({remote_user_id}),
899+
)
894900
mock_request = mock.patch.object(
895901
self.hs.get_federation_client(),
896902
"query_user_devices",
897903
new_callable=mock.MagicMock,
898904
return_value=make_awaitable(response_body),
899905
)
900906

901-
with mock_get_rooms, mock_request as mocked_federation_request:
907+
with mock_get_rooms, mock_get_users, mock_request as mocked_federation_request:
902908
# Make the first query and sanity check it succeeds.
903909
response_1 = self.get_success(
904910
e2e_handler.query_devices(

0 commit comments

Comments
 (0)