Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 70a4317

Browse files
Track when the pulled event signature fails (#13815)
Because we're doing the recording in `_check_sigs_and_hash_for_pulled_events_and_fetch` (previously named `_check_sigs_and_hash_and_fetch`), this means we will track signature failures for `backfill`, `get_room_state`, `get_event_auth`, and `get_missing_events` (all pulled event scenarios). And we also record signature failures from `get_pdu`. Part of #13700 Part of #13676 and #13356 This PR will be especially important for #13816 so we can avoid the costly `_get_state_ids_after_missing_prev_event` down the line when `/messages` calls backfill.
1 parent 92ae90a commit 70a4317

File tree

5 files changed

+140
-15
lines changed

5 files changed

+140
-15
lines changed

changelog.d/13815.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Keep track when an event pulled over federation fails its signature check so we can intelligently back-off in the future.

synapse/federation/federation_base.py

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515
import logging
16-
from typing import TYPE_CHECKING
16+
from typing import TYPE_CHECKING, Awaitable, Callable, Optional
1717

1818
from synapse.api.constants import MAX_DEPTH, EventContentFields, EventTypes, Membership
1919
from synapse.api.errors import Codes, SynapseError
@@ -58,7 +58,12 @@ def __init__(self, hs: "HomeServer"):
5858

5959
@trace
6060
async def _check_sigs_and_hash(
61-
self, room_version: RoomVersion, pdu: EventBase
61+
self,
62+
room_version: RoomVersion,
63+
pdu: EventBase,
64+
record_failure_callback: Optional[
65+
Callable[[EventBase, str], Awaitable[None]]
66+
] = None,
6267
) -> EventBase:
6368
"""Checks that event is correctly signed by the sending server.
6469
@@ -70,6 +75,11 @@ async def _check_sigs_and_hash(
7075
Args:
7176
room_version: The room version of the PDU
7277
pdu: the event to be checked
78+
record_failure_callback: A callback to run whenever the given event
79+
fails signature or hash checks. This includes exceptions
80+
that would be normally be thrown/raised but also things like
81+
checking for event tampering where we just return the redacted
82+
event.
7383
7484
Returns:
7585
* the original event if the checks pass
@@ -80,7 +90,12 @@ async def _check_sigs_and_hash(
8090
InvalidEventSignatureError if the signature check failed. Nothing
8191
will be logged in this case.
8292
"""
83-
await _check_sigs_on_pdu(self.keyring, room_version, pdu)
93+
try:
94+
await _check_sigs_on_pdu(self.keyring, room_version, pdu)
95+
except InvalidEventSignatureError as exc:
96+
if record_failure_callback:
97+
await record_failure_callback(pdu, str(exc))
98+
raise exc
8499

85100
if not check_event_content_hash(pdu):
86101
# let's try to distinguish between failures because the event was
@@ -116,6 +131,10 @@ async def _check_sigs_and_hash(
116131
"event_id": pdu.event_id,
117132
}
118133
)
134+
if record_failure_callback:
135+
await record_failure_callback(
136+
pdu, "Event content has been tampered with"
137+
)
119138
return redacted_event
120139

121140
spam_check = await self.spam_checker.check_event_for_spam(pdu)

synapse/federation/federation_client.py

Lines changed: 40 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ async def backfill(
278278
pdus = [event_from_pdu_json(p, room_version) for p in transaction_data_pdus]
279279

280280
# Check signatures and hash of pdus, removing any from the list that fail checks
281-
pdus[:] = await self._check_sigs_and_hash_and_fetch(
281+
pdus[:] = await self._check_sigs_and_hash_for_pulled_events_and_fetch(
282282
dest, pdus, room_version=room_version
283283
)
284284

@@ -328,7 +328,17 @@ async def get_pdu_from_destination_raw(
328328

329329
# Check signatures are correct.
330330
try:
331-
signed_pdu = await self._check_sigs_and_hash(room_version, pdu)
331+
332+
async def _record_failure_callback(
333+
event: EventBase, cause: str
334+
) -> None:
335+
await self.store.record_event_failed_pull_attempt(
336+
event.room_id, event.event_id, cause
337+
)
338+
339+
signed_pdu = await self._check_sigs_and_hash(
340+
room_version, pdu, _record_failure_callback
341+
)
332342
except InvalidEventSignatureError as e:
333343
errmsg = f"event id {pdu.event_id}: {e}"
334344
logger.warning("%s", errmsg)
@@ -547,24 +557,28 @@ async def get_room_state(
547557
len(auth_event_map),
548558
)
549559

550-
valid_auth_events = await self._check_sigs_and_hash_and_fetch(
560+
valid_auth_events = await self._check_sigs_and_hash_for_pulled_events_and_fetch(
551561
destination, auth_event_map.values(), room_version
552562
)
553563

554-
valid_state_events = await self._check_sigs_and_hash_and_fetch(
555-
destination, state_event_map.values(), room_version
564+
valid_state_events = (
565+
await self._check_sigs_and_hash_for_pulled_events_and_fetch(
566+
destination, state_event_map.values(), room_version
567+
)
556568
)
557569

558570
return valid_state_events, valid_auth_events
559571

560572
@trace
561-
async def _check_sigs_and_hash_and_fetch(
573+
async def _check_sigs_and_hash_for_pulled_events_and_fetch(
562574
self,
563575
origin: str,
564576
pdus: Collection[EventBase],
565577
room_version: RoomVersion,
566578
) -> List[EventBase]:
567-
"""Checks the signatures and hashes of a list of events.
579+
"""
580+
Checks the signatures and hashes of a list of pulled events we got from
581+
federation and records any signature failures as failed pull attempts.
568582
569583
If a PDU fails its signature check then we check if we have it in
570584
the database, and if not then request it from the sender's server (if that
@@ -597,11 +611,17 @@ async def _check_sigs_and_hash_and_fetch(
597611

598612
valid_pdus: List[EventBase] = []
599613

614+
async def _record_failure_callback(event: EventBase, cause: str) -> None:
615+
await self.store.record_event_failed_pull_attempt(
616+
event.room_id, event.event_id, cause
617+
)
618+
600619
async def _execute(pdu: EventBase) -> None:
601620
valid_pdu = await self._check_sigs_and_hash_and_fetch_one(
602621
pdu=pdu,
603622
origin=origin,
604623
room_version=room_version,
624+
record_failure_callback=_record_failure_callback,
605625
)
606626

607627
if valid_pdu:
@@ -618,6 +638,9 @@ async def _check_sigs_and_hash_and_fetch_one(
618638
pdu: EventBase,
619639
origin: str,
620640
room_version: RoomVersion,
641+
record_failure_callback: Optional[
642+
Callable[[EventBase, str], Awaitable[None]]
643+
] = None,
621644
) -> Optional[EventBase]:
622645
"""Takes a PDU and checks its signatures and hashes.
623646
@@ -634,14 +657,21 @@ async def _check_sigs_and_hash_and_fetch_one(
634657
origin
635658
pdu
636659
room_version
660+
record_failure_callback: A callback to run whenever the given event
661+
fails signature or hash checks. This includes exceptions
662+
that would be normally be thrown/raised but also things like
663+
checking for event tampering where we just return the redacted
664+
event.
637665
638666
Returns:
639667
The PDU (possibly redacted) if it has valid signatures and hashes.
640668
None if no valid copy could be found.
641669
"""
642670

643671
try:
644-
return await self._check_sigs_and_hash(room_version, pdu)
672+
return await self._check_sigs_and_hash(
673+
room_version, pdu, record_failure_callback
674+
)
645675
except InvalidEventSignatureError as e:
646676
logger.warning(
647677
"Signature on retrieved event %s was invalid (%s). "
@@ -694,7 +724,7 @@ async def get_event_auth(
694724

695725
auth_chain = [event_from_pdu_json(p, room_version) for p in res["auth_chain"]]
696726

697-
signed_auth = await self._check_sigs_and_hash_and_fetch(
727+
signed_auth = await self._check_sigs_and_hash_for_pulled_events_and_fetch(
698728
destination, auth_chain, room_version=room_version
699729
)
700730

@@ -1401,7 +1431,7 @@ async def get_missing_events(
14011431
event_from_pdu_json(e, room_version) for e in content.get("events", [])
14021432
]
14031433

1404-
signed_events = await self._check_sigs_and_hash_and_fetch(
1434+
signed_events = await self._check_sigs_and_hash_for_pulled_events_and_fetch(
14051435
destination, events, room_version=room_version
14061436
)
14071437
except HttpResponseException as e:

tests/federation/test_federation_client.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,23 @@
2323

2424
from synapse.api.room_versions import RoomVersions
2525
from synapse.events import EventBase
26+
from synapse.rest import admin
27+
from synapse.rest.client import login, room
2628
from synapse.server import HomeServer
2729
from synapse.types import JsonDict
2830
from synapse.util import Clock
2931

32+
from tests.test_utils import event_injection
3033
from tests.unittest import FederatingHomeserverTestCase
3134

3235

3336
class FederationClientTest(FederatingHomeserverTestCase):
37+
servlets = [
38+
admin.register_servlets,
39+
room.register_servlets,
40+
login.register_servlets,
41+
]
42+
3443
def prepare(self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer):
3544
super().prepare(reactor, clock, homeserver)
3645

@@ -231,6 +240,72 @@ def _get_pdu_once(self) -> EventBase:
231240

232241
return remote_pdu
233242

243+
def test_backfill_invalid_signature_records_failed_pull_attempts(
244+
self,
245+
) -> None:
246+
"""
247+
Test to make sure that events from /backfill with invalid signatures get
248+
recorded as failed pull attempts.
249+
"""
250+
OTHER_USER = f"@user:{self.OTHER_SERVER_NAME}"
251+
main_store = self.hs.get_datastores().main
252+
253+
# Create the room
254+
user_id = self.register_user("kermit", "test")
255+
tok = self.login("kermit", "test")
256+
room_id = self.helper.create_room_as(room_creator=user_id, tok=tok)
257+
258+
# We purposely don't run `add_hashes_and_signatures_from_other_server`
259+
# over this because we want the signature check to fail.
260+
pulled_event, _ = self.get_success(
261+
event_injection.create_event(
262+
self.hs,
263+
room_id=room_id,
264+
sender=OTHER_USER,
265+
type="test_event_type",
266+
content={"body": "garply"},
267+
)
268+
)
269+
270+
# We expect an outbound request to /backfill, so stub that out
271+
self._mock_agent.request.side_effect = lambda *args, **kwargs: defer.succeed(
272+
_mock_response(
273+
{
274+
"origin": "yet.another.server",
275+
"origin_server_ts": 900,
276+
# Mimic the other server returning our new `pulled_event`
277+
"pdus": [pulled_event.get_pdu_json()],
278+
}
279+
)
280+
)
281+
282+
self.get_success(
283+
self.hs.get_federation_client().backfill(
284+
# We use "yet.another.server" instead of
285+
# `self.OTHER_SERVER_NAME` because we want to see the behavior
286+
# from `_check_sigs_and_hash_and_fetch_one` where it tries to
287+
# fetch the PDU again from the origin server if the signature
288+
# fails. Just want to make sure that the failure is counted from
289+
# both code paths.
290+
dest="yet.another.server",
291+
room_id=room_id,
292+
limit=1,
293+
extremities=[pulled_event.event_id],
294+
),
295+
)
296+
297+
# Make sure our failed pull attempt was recorded
298+
backfill_num_attempts = self.get_success(
299+
main_store.db_pool.simple_select_one_onecol(
300+
table="event_failed_pull_attempts",
301+
keyvalues={"event_id": pulled_event.event_id},
302+
retcol="num_attempts",
303+
)
304+
)
305+
# This is 2 because it failed once from `self.OTHER_SERVER_NAME` and the
306+
# other from "yet.another.server"
307+
self.assertEqual(backfill_num_attempts, 2)
308+
234309

235310
def _mock_response(resp: JsonDict):
236311
body = json.dumps(resp).encode("utf-8")

tests/test_federation.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,8 @@ async def _check_event_auth(origin, event, context):
8686

8787
federation_event_handler._check_event_auth = _check_event_auth
8888
self.client = self.homeserver.get_federation_client()
89-
self.client._check_sigs_and_hash_and_fetch = lambda dest, pdus, **k: succeed(
90-
pdus
89+
self.client._check_sigs_and_hash_for_pulled_events_and_fetch = (
90+
lambda dest, pdus, **k: succeed(pdus)
9191
)
9292

9393
# Send the join, it should return None (which is not an error)

0 commit comments

Comments
 (0)