Skip to content

Commit bc309be

Browse files
richvdhazmeuk
authored andcommitted
Fix infinite loop in partial-state resync (matrix-org#13353)
Make sure that we only pull out events from the db once they have no prev-events with partial state.
1 parent 797e898 commit bc309be

File tree

3 files changed

+27
-8
lines changed

3 files changed

+27
-8
lines changed

changelog.d/13353.bugfix

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix a bug in the experimental faster-room-joins support which could cause it to get stuck in an infinite loop.

synapse/handlers/federation_event.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -569,15 +569,15 @@ async def update_state_for_partial_state_event(
569569

570570
if context is None or context.partial_state:
571571
# this can happen if some or all of the event's prev_events still have
572-
# partial state - ie, an event has an earlier stream_ordering than one
573-
# or more of its prev_events, so we de-partial-state it before its
574-
# prev_events.
572+
# partial state. We were careful to only pick events from the db without
573+
# partial-state prev events, so that implies that a prev event has
574+
# been persisted (with partial state) since we did the query.
575575
#
576-
# TODO(faster_joins): we probably need to be more intelligent, and
577-
# exclude partial-state prev_events from consideration
578-
# https://github.com/matrix-org/synapse/issues/13001
576+
# So, let's just ignore `event` for now; when we re-run the db query
577+
# we should instead get its partial-state prev event, which we will
578+
# de-partial-state, and then come back to event.
579579
logger.warning(
580-
"%s still has partial state: can't de-partial-state it yet",
580+
"%s still has prev_events with partial state: can't de-partial-state it yet",
581581
event.event_id,
582582
)
583583
return

synapse/storage/databases/main/events_worker.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2110,11 +2110,29 @@ async def get_partial_state_events_batch(self, room_id: str) -> List[str]:
21102110
def _get_partial_state_events_batch_txn(
21112111
txn: LoggingTransaction, room_id: str
21122112
) -> List[str]:
2113+
# we want to work through the events from oldest to newest, so
2114+
# we only want events whose prev_events do *not* have partial state - hence
2115+
# the 'NOT EXISTS' clause in the below.
2116+
#
2117+
# This is necessary because ordering by stream ordering isn't quite enough
2118+
# to ensure that we work from oldest to newest event (in particular,
2119+
# if an event is initially persisted as an outlier and later de-outliered,
2120+
# it can end up with a lower stream_ordering than its prev_events).
2121+
#
2122+
# Typically this means we'll only return one event per batch, but that's
2123+
# hard to do much about.
2124+
#
2125+
# See also: https://github.com/matrix-org/synapse/issues/13001
21132126
txn.execute(
21142127
"""
21152128
SELECT event_id FROM partial_state_events AS pse
21162129
JOIN events USING (event_id)
2117-
WHERE pse.room_id = ?
2130+
WHERE pse.room_id = ? AND
2131+
NOT EXISTS(
2132+
SELECT 1 FROM event_edges AS ee
2133+
JOIN partial_state_events AS prev_pse ON (prev_pse.event_id=ee.prev_event_id)
2134+
WHERE ee.event_id=pse.event_id
2135+
)
21182136
ORDER BY events.stream_ordering
21192137
LIMIT 100
21202138
""",

0 commit comments

Comments
 (0)