Skip to content
This repository was archived by the owner on Apr 26, 2024. It is now read-only.

Commit 38a6d3e

Browse files
Add basic opentracing support (#5544)
* Configure and initialise tracer Includes config options for the tracer and sets up JaegerClient. * Scope manager using LogContexts We piggy-back our tracer scopes by using log context. The current log context gives us the current scope. If new scope is created we create a stack of scopes in the context. * jaeger is a dependency now * Carrier inject and extraction for Twisted Headers * Trace federation requests on the way in and out. The span is created in _started_processing and closed in _finished_processing because we need a meaningful log context. * Create logcontext for new scope. Instead of having a stack of scopes in a logcontext we create a new context for a new scope if the current logcontext already has a scope. * Remove scope from logcontext if logcontext is top level * Disable tracer if not configured * typo * Remove dependence on jaeger internals * bools * Set service name * :Explicitely state that the tracer is disabled * Black is the new black * Newsfile * Code style * Use the new config setup. * Generate config. * Copyright * Rename config to opentracing * Remove user whitelisting * Empty whitelist by default * User ConfigError instead of RuntimeError * Use isinstance * Use tag constants for opentracing. * Remove debug comment and no need to explicitely record error * Two errors a "s(c)entry" * Docstrings! * Remove debugging brainslip * Homeserver Whitlisting * Better opentracing config comment * linting * Inclue worker name in service_name * Make opentracing an optional dependency * Neater config retreival * Clean up dummy tags * Instantiate tracing as object instead of global class * Inlcude opentracing as a homeserver member. * Thread opentracing to the request level * Reference opetnracing through hs * Instantiate dummy opentracin g for tests. * About to revert, just keeping the unfinished changes just in case * Revert back to global state, commit number: 9ce4a3d * Use class level methods in tracerutils * Start and stop requests spans in a place where we have access to the authenticated entity * Seen it, isort it * Make sure to close the active span. * I'm getting black and blue from this. * Logger formatting Co-Authored-By: Erik Johnston <[email protected]> * Outdated comment * Import opentracing at the top * Return a contextmanager * Start tracing client requests from the servlet * Return noop context manager if not tracing * Explicitely say that these are federation requests * Include servlet name in client requests * Use context manager * Move opentracing to logging/ * Seen it, isort it again! * Ignore twisted return exceptions on context exit * Escape the scope * Scopes should be entered to make them useful. * Nicer decorator names * Just one init, init? * Don't need to close something that isn't open * Docs make you smarter
1 parent 1890cfc commit 38a6d3e

File tree

12 files changed

+633
-12
lines changed

12 files changed

+633
-12
lines changed

changelog.d/5544.misc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added opentracing and configuration options.

docs/sample_config.yaml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1395,3 +1395,20 @@ password_config:
13951395
# module: "my_custom_project.SuperRulesSet"
13961396
# config:
13971397
# example_option: 'things'
1398+
1399+
1400+
## Opentracing ##
1401+
# These settings enable opentracing which implements distributed tracing
1402+
# This allows you to observe the causal chain of events across servers
1403+
# including requests, key lookups etc. across any server running
1404+
# synapse or any other other services which supports opentracing.
1405+
# (specifically those implemented with jaeger)
1406+
1407+
#opentracing:
1408+
# # Enable / disable tracer
1409+
# tracer_enabled: false
1410+
# # The list of homeservers we wish to expose our current traces to.
1411+
# # The list is a list of regexes which are matched against the
1412+
# # servername of the homeserver
1413+
# homeserver_whitelist:
1414+
# - ".*"

synapse/app/_base.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,9 @@ def handle_sighup(*args, **kwargs):
243243
# Load the certificate from disk.
244244
refresh_certificate(hs)
245245

246+
# Start the tracer
247+
synapse.logging.opentracing.init_tracer(hs.config)
248+
246249
# It is now safe to start your Synapse.
247250
hs.start_listening(listeners)
248251
hs.get_datastore().start_profiling()

synapse/config/homeserver.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
from .stats import StatsConfig
4141
from .third_party_event_rules import ThirdPartyRulesConfig
4242
from .tls import TlsConfig
43+
from .tracer import TracerConfig
4344
from .user_directory import UserDirectoryConfig
4445
from .voip import VoipConfig
4546
from .workers import WorkerConfig
@@ -75,5 +76,6 @@ class HomeServerConfig(
7576
ServerNoticesConfig,
7677
RoomDirectoryConfig,
7778
ThirdPartyRulesConfig,
79+
TracerConfig,
7880
):
7981
pass

synapse/config/tracer.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# -*- coding: utf-8 -*-
2+
# Copyright 2019 The Matrix.org Foundation C.I.C.d
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
from ._base import Config, ConfigError
17+
18+
19+
class TracerConfig(Config):
20+
def read_config(self, config, **kwargs):
21+
self.tracer_config = config.get("opentracing")
22+
23+
self.tracer_config = config.get("opentracing", {"tracer_enabled": False})
24+
25+
if self.tracer_config.get("tracer_enabled", False):
26+
# The tracer is enabled so sanitize the config
27+
# If no whitelists are given
28+
self.tracer_config.setdefault("homeserver_whitelist", [])
29+
30+
if not isinstance(self.tracer_config.get("homeserver_whitelist"), list):
31+
raise ConfigError("Tracer homesererver_whitelist config is malformed")
32+
33+
def generate_config_section(cls, **kwargs):
34+
return """\
35+
## Opentracing ##
36+
# These settings enable opentracing which implements distributed tracing
37+
# This allows you to observe the causal chain of events across servers
38+
# including requests, key lookups etc. across any server running
39+
# synapse or any other other services which supports opentracing.
40+
# (specifically those implemented with jaeger)
41+
42+
#opentracing:
43+
# # Enable / disable tracer
44+
# tracer_enabled: false
45+
# # The list of homeservers we wish to expose our current traces to.
46+
# # The list is a list of regexes which are matched against the
47+
# # servername of the homeserver
48+
# homeserver_whitelist:
49+
# - ".*"
50+
"""

synapse/federation/transport/server.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from twisted.internet import defer
2222

2323
import synapse
24+
import synapse.logging.opentracing as opentracing
2425
from synapse.api.errors import Codes, FederationDeniedError, SynapseError
2526
from synapse.api.room_versions import RoomVersions
2627
from synapse.api.urls import (
@@ -288,14 +289,29 @@ def new_func(request, *args, **kwargs):
288289
logger.warn("authenticate_request failed: %s", e)
289290
raise
290291

291-
if origin:
292-
with ratelimiter.ratelimit(origin) as d:
293-
yield d
292+
# Start an opentracing span
293+
with opentracing.start_active_span_from_context(
294+
request.requestHeaders,
295+
"incoming-federation-request",
296+
tags={
297+
"request_id": request.get_request_id(),
298+
opentracing.tags.SPAN_KIND: opentracing.tags.SPAN_KIND_RPC_SERVER,
299+
opentracing.tags.HTTP_METHOD: request.get_method(),
300+
opentracing.tags.HTTP_URL: request.get_redacted_uri(),
301+
opentracing.tags.PEER_HOST_IPV6: request.getClientIP(),
302+
"authenticated_entity": origin,
303+
},
304+
):
305+
if origin:
306+
with ratelimiter.ratelimit(origin) as d:
307+
yield d
308+
response = yield func(
309+
origin, content, request.args, *args, **kwargs
310+
)
311+
else:
294312
response = yield func(
295313
origin, content, request.args, *args, **kwargs
296314
)
297-
else:
298-
response = yield func(origin, content, request.args, *args, **kwargs)
299315

300316
defer.returnValue(response)
301317

synapse/http/matrixfederationclient.py

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from twisted.web._newclient import ResponseDone
3737
from twisted.web.http_headers import Headers
3838

39+
import synapse.logging.opentracing as opentracing
3940
import synapse.metrics
4041
import synapse.util.retryutils
4142
from synapse.api.errors import (
@@ -339,9 +340,25 @@ def _send_request(
339340
else:
340341
query_bytes = b""
341342

342-
headers_dict = {b"User-Agent": [self.version_string_bytes]}
343+
# Retreive current span
344+
scope = opentracing.start_active_span(
345+
"outgoing-federation-request",
346+
tags={
347+
opentracing.tags.SPAN_KIND: opentracing.tags.SPAN_KIND_RPC_CLIENT,
348+
opentracing.tags.PEER_ADDRESS: request.destination,
349+
opentracing.tags.HTTP_METHOD: request.method,
350+
opentracing.tags.HTTP_URL: request.path,
351+
},
352+
finish_on_close=True,
353+
)
354+
355+
# Inject the span into the headers
356+
headers_dict = {}
357+
opentracing.inject_active_span_byte_dict(headers_dict, request.destination)
343358

344-
with limiter:
359+
headers_dict[b"User-Agent"] = [self.version_string_bytes]
360+
361+
with limiter, scope:
345362
# XXX: Would be much nicer to retry only at the transaction-layer
346363
# (once we have reliable transactions in place)
347364
if long_retries:
@@ -419,6 +436,10 @@ def _send_request(
419436
response.phrase.decode("ascii", errors="replace"),
420437
)
421438

439+
opentracing.set_tag(
440+
opentracing.tags.HTTP_STATUS_CODE, response.code
441+
)
442+
422443
if 200 <= response.code < 300:
423444
pass
424445
else:
@@ -499,8 +520,7 @@ def _send_request(
499520
_flatten_response_never_received(e),
500521
)
501522
raise
502-
503-
defer.returnValue(response)
523+
defer.returnValue(response)
504524

505525
def build_auth_headers(
506526
self, destination, method, url_bytes, content=None, destination_is=None

synapse/http/servlet.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from canonicaljson import json
2121

2222
from synapse.api.errors import Codes, SynapseError
23+
from synapse.logging.opentracing import trace_servlet
2324

2425
logger = logging.getLogger(__name__)
2526

@@ -290,7 +291,11 @@ def register(self, http_server):
290291
for method in ("GET", "PUT", "POST", "OPTIONS", "DELETE"):
291292
if hasattr(self, "on_%s" % (method,)):
292293
method_handler = getattr(self, "on_%s" % (method,))
293-
http_server.register_paths(method, patterns, method_handler)
294+
http_server.register_paths(
295+
method,
296+
patterns,
297+
trace_servlet(self.__class__.__name__, method_handler),
298+
)
294299

295300
else:
296301
raise NotImplementedError("RestServlet must register something.")

synapse/logging/context.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ class LoggingContext(object):
186186
"alive",
187187
"request",
188188
"tag",
189+
"scope",
189190
]
190191

191192
thread_local = threading.local()
@@ -238,6 +239,7 @@ def __init__(self, name=None, parent_context=None, request=None):
238239
self.request = None
239240
self.tag = ""
240241
self.alive = True
242+
self.scope = None
241243

242244
self.parent_context = parent_context
243245

@@ -322,10 +324,12 @@ def copy_to(self, record):
322324
another LoggingContext
323325
"""
324326

325-
# 'request' is the only field we currently use in the logger, so that's
326-
# all we need to copy
327+
# we track the current request
327328
record.request = self.request
328329

330+
# we also track the current scope:
331+
record.scope = self.scope
332+
329333
def start(self):
330334
if get_thread_id() != self.main_thread:
331335
logger.warning("Started logcontext %s on different thread", self)

0 commit comments

Comments
 (0)