Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 9e40503

Browse files
authored
Make opentracing trace into event persistence (#10134)
* Trace event persistence When we persist a batch of events, set the parent opentracing span to the that from the request, so that we can trace all the way in. * changelog * When we force tracing, set a baggage item ... so that we can check again later. * Link in both directions between persist_events spans
1 parent d09e24a commit 9e40503

File tree

4 files changed

+99
-9
lines changed

4 files changed

+99
-9
lines changed

changelog.d/10134.misc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Improve OpenTracing for event persistence.

synapse/api/auth.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ async def get_user_by_req(
207207

208208
request.requester = user_id
209209
if user_id in self._force_tracing_for_users:
210-
opentracing.set_tag(opentracing.tags.SAMPLING_PRIORITY, 1)
210+
opentracing.force_tracing()
211211
opentracing.set_tag("authenticated_entity", user_id)
212212
opentracing.set_tag("user_id", user_id)
213213
opentracing.set_tag("appservice_id", app_service.id)
@@ -260,7 +260,7 @@ async def get_user_by_req(
260260

261261
request.requester = requester
262262
if user_info.token_owner in self._force_tracing_for_users:
263-
opentracing.set_tag(opentracing.tags.SAMPLING_PRIORITY, 1)
263+
opentracing.force_tracing()
264264
opentracing.set_tag("authenticated_entity", user_info.token_owner)
265265
opentracing.set_tag("user_id", user_info.user_id)
266266
if device_id:

synapse/logging/opentracing.py

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ def set_fates(clotho, lachesis, atropos, father="Zues", mother="Themis"):
168168
import logging
169169
import re
170170
from functools import wraps
171-
from typing import TYPE_CHECKING, Dict, List, Optional, Pattern, Type
171+
from typing import TYPE_CHECKING, Collection, Dict, List, Optional, Pattern, Type
172172

173173
import attr
174174

@@ -278,13 +278,19 @@ class SynapseTags:
278278
DB_TXN_ID = "db.txn_id"
279279

280280

281+
class SynapseBaggage:
282+
FORCE_TRACING = "synapse-force-tracing"
283+
284+
281285
# Block everything by default
282286
# A regex which matches the server_names to expose traces for.
283287
# None means 'block everything'.
284288
_homeserver_whitelist = None # type: Optional[Pattern[str]]
285289

286290
# Util methods
287291

292+
Sentinel = object()
293+
288294

289295
def only_if_tracing(func):
290296
"""Executes the function only if we're tracing. Otherwise returns None."""
@@ -447,12 +453,28 @@ def start_active_span(
447453
)
448454

449455

450-
def start_active_span_follows_from(operation_name, contexts):
456+
def start_active_span_follows_from(
457+
operation_name: str, contexts: Collection, inherit_force_tracing=False
458+
):
459+
"""Starts an active opentracing span, with additional references to previous spans
460+
461+
Args:
462+
operation_name: name of the operation represented by the new span
463+
contexts: the previous spans to inherit from
464+
inherit_force_tracing: if set, and any of the previous contexts have had tracing
465+
forced, the new span will also have tracing forced.
466+
"""
451467
if opentracing is None:
452468
return noop_context_manager()
453469

454470
references = [opentracing.follows_from(context) for context in contexts]
455471
scope = start_active_span(operation_name, references=references)
472+
473+
if inherit_force_tracing and any(
474+
is_context_forced_tracing(ctx) for ctx in contexts
475+
):
476+
force_tracing(scope.span)
477+
456478
return scope
457479

458480

@@ -551,6 +573,10 @@ def start_active_span_from_edu(
551573

552574

553575
# Opentracing setters for tags, logs, etc
576+
@only_if_tracing
577+
def active_span():
578+
"""Get the currently active span, if any"""
579+
return opentracing.tracer.active_span
554580

555581

556582
@ensure_active_span("set a tag")
@@ -571,6 +597,33 @@ def set_operation_name(operation_name):
571597
opentracing.tracer.active_span.set_operation_name(operation_name)
572598

573599

600+
@only_if_tracing
601+
def force_tracing(span=Sentinel) -> None:
602+
"""Force sampling for the active/given span and its children.
603+
604+
Args:
605+
span: span to force tracing for. By default, the active span.
606+
"""
607+
if span is Sentinel:
608+
span = opentracing.tracer.active_span
609+
if span is None:
610+
logger.error("No active span in force_tracing")
611+
return
612+
613+
span.set_tag(opentracing.tags.SAMPLING_PRIORITY, 1)
614+
615+
# also set a bit of baggage, so that we have a way of figuring out if
616+
# it is enabled later
617+
span.set_baggage_item(SynapseBaggage.FORCE_TRACING, "1")
618+
619+
620+
def is_context_forced_tracing(span_context) -> bool:
621+
"""Check if sampling has been force for the given span context."""
622+
if span_context is None:
623+
return False
624+
return span_context.baggage.get(SynapseBaggage.FORCE_TRACING) is not None
625+
626+
574627
# Injection and extraction
575628

576629

synapse/storage/persist_events.py

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import logging
1919
from collections import deque
2020
from typing import (
21+
Any,
2122
Awaitable,
2223
Callable,
2324
Collection,
@@ -40,6 +41,7 @@
4041
from synapse.api.constants import EventTypes, Membership
4142
from synapse.events import EventBase
4243
from synapse.events.snapshot import EventContext
44+
from synapse.logging import opentracing
4345
from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable
4446
from synapse.metrics.background_process_metrics import run_as_background_process
4547
from synapse.storage.databases import Databases
@@ -103,12 +105,18 @@
103105
)
104106

105107

106-
@attr.s(auto_attribs=True, frozen=True, slots=True)
108+
@attr.s(auto_attribs=True, slots=True)
107109
class _EventPersistQueueItem:
108110
events_and_contexts: List[Tuple[EventBase, EventContext]]
109111
backfilled: bool
110112
deferred: ObservableDeferred
111113

114+
parent_opentracing_span_contexts: List = []
115+
"""A list of opentracing spans waiting for this batch"""
116+
117+
opentracing_span_context: Any = None
118+
"""The opentracing span under which the persistence actually happened"""
119+
112120

113121
_PersistResult = TypeVar("_PersistResult")
114122

@@ -171,9 +179,27 @@ async def add_to_queue(
171179
)
172180
queue.append(end_item)
173181

182+
# add our events to the queue item
174183
end_item.events_and_contexts.extend(events_and_contexts)
184+
185+
# also add our active opentracing span to the item so that we get a link back
186+
span = opentracing.active_span()
187+
if span:
188+
end_item.parent_opentracing_span_contexts.append(span.context)
189+
190+
# start a processor for the queue, if there isn't one already
175191
self._handle_queue(room_id)
176-
return await make_deferred_yieldable(end_item.deferred.observe())
192+
193+
# wait for the queue item to complete
194+
res = await make_deferred_yieldable(end_item.deferred.observe())
195+
196+
# add another opentracing span which links to the persist trace.
197+
with opentracing.start_active_span_follows_from(
198+
"persist_event_batch_complete", (end_item.opentracing_span_context,)
199+
):
200+
pass
201+
202+
return res
177203

178204
def _handle_queue(self, room_id):
179205
"""Attempts to handle the queue for a room if not already being handled.
@@ -200,9 +226,17 @@ async def handle_queue_loop():
200226
queue = self._get_drainining_queue(room_id)
201227
for item in queue:
202228
try:
203-
ret = await self._per_item_callback(
204-
item.events_and_contexts, item.backfilled
205-
)
229+
with opentracing.start_active_span_follows_from(
230+
"persist_event_batch",
231+
item.parent_opentracing_span_contexts,
232+
inherit_force_tracing=True,
233+
) as scope:
234+
if scope:
235+
item.opentracing_span_context = scope.span.context
236+
237+
ret = await self._per_item_callback(
238+
item.events_and_contexts, item.backfilled
239+
)
206240
except Exception:
207241
with PreserveLoggingContext():
208242
item.deferred.errback()
@@ -252,6 +286,7 @@ def __init__(self, hs, stores: Databases):
252286
self._event_persist_queue = _EventPeristenceQueue(self._persist_event_batch)
253287
self._state_resolution_handler = hs.get_state_resolution_handler()
254288

289+
@opentracing.trace
255290
async def persist_events(
256291
self,
257292
events_and_contexts: Iterable[Tuple[EventBase, EventContext]],
@@ -307,6 +342,7 @@ async def enqueue(item):
307342
self.main_store.get_room_max_token(),
308343
)
309344

345+
@opentracing.trace
310346
async def persist_event(
311347
self, event: EventBase, context: EventContext, backfilled: bool = False
312348
) -> Tuple[EventBase, PersistedEventPosition, RoomStreamToken]:

0 commit comments

Comments
 (0)