Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit ab737dd

Browse files
committed
Merge remote-tracking branch 'origin/develop' into rav/clean_up_event_edges
2 parents c29b21b + f1605b7 commit ab737dd

File tree

4 files changed

+36
-27
lines changed

4 files changed

+36
-27
lines changed

changelog.d/12886.misc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Refactor `have_seen_events` to reduce memory consumed when processing federation traffic.

changelog.d/12889.bugfix

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix a bug introduced in Synapse 1.59.0 which caused room deletion to fail with a foreign key violation.

synapse/storage/databases/main/events_worker.py

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1356,14 +1356,23 @@ async def have_seen_events(
13561356
Returns:
13571357
The set of events we have already seen.
13581358
"""
1359-
res = await self._have_seen_events_dict(
1360-
(room_id, event_id) for event_id in event_ids
1361-
)
1362-
return {eid for ((_rid, eid), have_event) in res.items() if have_event}
1359+
1360+
# @cachedList chomps lots of memory if you call it with a big list, so
1361+
# we break it down. However, each batch requires its own index scan, so we make
1362+
# the batches as big as possible.
1363+
1364+
results: Set[str] = set()
1365+
for chunk in batch_iter(event_ids, 500):
1366+
r = await self._have_seen_events_dict(
1367+
[(room_id, event_id) for event_id in chunk]
1368+
)
1369+
results.update(eid for ((_rid, eid), have_event) in r.items() if have_event)
1370+
1371+
return results
13631372

13641373
@cachedList(cached_method_name="have_seen_event", list_name="keys")
13651374
async def _have_seen_events_dict(
1366-
self, keys: Iterable[Tuple[str, str]]
1375+
self, keys: Collection[Tuple[str, str]]
13671376
) -> Dict[Tuple[str, str], bool]:
13681377
"""Helper for have_seen_events
13691378
@@ -1375,33 +1384,30 @@ async def _have_seen_events_dict(
13751384
cache_results = {
13761385
(rid, eid) for (rid, eid) in keys if self._get_event_cache.contains((eid,))
13771386
}
1378-
results = {x: True for x in cache_results}
1387+
results = dict.fromkeys(cache_results, True)
1388+
remaining = [k for k in keys if k not in cache_results]
1389+
if not remaining:
1390+
return results
13791391

1380-
def have_seen_events_txn(
1381-
txn: LoggingTransaction, chunk: Tuple[Tuple[str, str], ...]
1382-
) -> None:
1392+
def have_seen_events_txn(txn: LoggingTransaction) -> None:
13831393
# we deliberately do *not* query the database for room_id, to make the
13841394
# query an index-only lookup on `events_event_id_key`.
13851395
#
13861396
# We therefore pull the events from the database into a set...
13871397

13881398
sql = "SELECT event_id FROM events AS e WHERE "
13891399
clause, args = make_in_list_sql_clause(
1390-
txn.database_engine, "e.event_id", [eid for (_rid, eid) in chunk]
1400+
txn.database_engine, "e.event_id", [eid for (_rid, eid) in remaining]
13911401
)
13921402
txn.execute(sql + clause, args)
13931403
found_events = {eid for eid, in txn}
13941404

1395-
# ... and then we can update the results for each row in the batch
1396-
results.update({(rid, eid): (eid in found_events) for (rid, eid) in chunk})
1397-
1398-
# each batch requires its own index scan, so we make the batches as big as
1399-
# possible.
1400-
for chunk in batch_iter((k for k in keys if k not in cache_results), 500):
1401-
await self.db_pool.runInteraction(
1402-
"have_seen_events", have_seen_events_txn, chunk
1405+
# ... and then we can update the results for each key
1406+
results.update(
1407+
{(rid, eid): (eid in found_events) for (rid, eid) in remaining}
14031408
)
14041409

1410+
await self.db_pool.runInteraction("have_seen_events", have_seen_events_txn)
14051411
return results
14061412

14071413
@cached(max_entries=100000, tree=True)

synapse/storage/databases/main/purge_events.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -322,12 +322,7 @@ async def purge_room(self, room_id: str) -> List[int]:
322322
)
323323

324324
def _purge_room_txn(self, txn: LoggingTransaction, room_id: str) -> List[int]:
325-
# We *immediately* delete the room from the rooms table. This ensures
326-
# that we don't race when persisting events (as that transaction checks
327-
# that the room exists).
328-
txn.execute("DELETE FROM rooms WHERE room_id = ?", (room_id,))
329-
330-
# Next, we fetch all the state groups that should be deleted, before
325+
# First, fetch all the state groups that should be deleted, before
331326
# we delete that information.
332327
txn.execute(
333328
"""
@@ -387,16 +382,21 @@ def _purge_room_txn(self, txn: LoggingTransaction, room_id: str) -> List[int]:
387382
(room_id,),
388383
)
389384

390-
# and finally, the tables with an index on room_id (or no useful index)
385+
# next, the tables with an index on room_id (or no useful index)
391386
for table in (
392387
"current_state_events",
393388
"destination_rooms",
394389
"event_backward_extremities",
395390
"event_forward_extremities",
396391
"event_push_actions",
397392
"event_search",
393+
"partial_state_events",
398394
"events",
395+
"federation_inbound_events_staging",
399396
"group_rooms",
397+
"local_current_membership",
398+
"partial_state_rooms_servers",
399+
"partial_state_rooms",
400400
"receipts_graph",
401401
"receipts_linearized",
402402
"room_aliases",
@@ -416,8 +416,9 @@ def _purge_room_txn(self, txn: LoggingTransaction, room_id: str) -> List[int]:
416416
"group_summary_rooms",
417417
"room_account_data",
418418
"room_tags",
419-
"local_current_membership",
420-
"federation_inbound_events_staging",
419+
# "rooms" happens last, to keep the foreign keys in the other tables
420+
# happy
421+
"rooms",
421422
):
422423
logger.info("[purge] removing %s from %s", room_id, table)
423424
txn.execute("DELETE FROM %s WHERE room_id=?" % (table,), (room_id,))

0 commit comments

Comments
 (0)