diff --git a/docs/user_guide/trace.md b/docs/user_guide/trace.md index afd56ee335..23d1c402d1 100644 --- a/docs/user_guide/trace.md +++ b/docs/user_guide/trace.md @@ -427,11 +427,38 @@ The meaning of the trace timestamps is: * BACKEND_OUTPUT: The tensor in the response of a backend. +## Tracing for BLS models + +Triton does not collect traces for child models invoked from +[BLS](https://github.com/triton-inference-server/python_backend/tree/main#business-logic-scripting) +models by default. + +To include child models into collected traces, user needs to provide the `trace` +argument (as shown in the example below), when constructing an InferenceRequest object. +This helps Triton associate the child model with the parent model's trace (`request.trace()`). + +```python + +import triton_python_backend_utils as pb_utils + + +class TritonPythonModel: + ... + def execute(self, requests): + ... + for request in requests: + ... + inference_request = pb_utils.InferenceRequest( + model_name='model_name', + requested_output_names=['REQUESTED_OUTPUT_1', 'REQUESTED_OUTPUT_2'], + inputs=[], trace = request.trace()) + +``` + ## OpenTelemetry trace support -Triton provides an option to generate and export traces -for standalone and ensemble models -using [OpenTelemetry APIs and SDKs](https://opentelemetry.io/). +Triton provides an option to generate and export traces using +[OpenTelemetry APIs and SDKs](https://opentelemetry.io/). To specify OpenTelemetry mode for tracing, specify the `--trace-config` flag as follows: @@ -477,16 +504,30 @@ The following table shows available OpenTelemetry trace APIs settings for trace data. + + resource + service.name=triton-inference-server + + Key-value pairs to be used as resource attributes.
+ Should be specified following the provided template:
+ --trace-config opentelemetry,resource=<key>=<value>
+ For example:
+ --trace-config opentelemetry,resource=service.name=triton
+ --trace-config opentelemetry,resource=service.version=1
+ Alternatively, key-vaue attributes can be specified through
+ + OTEL_RESOURCE_ATTRIBUTES + environment variable. + + + ### Limitations - OpenTelemetry trace mode is not supported on Windows systems. -- Tracing [BLS](https://github.com/triton-inference-server/python_backend/tree/main#business-logic-scripting) -models is not supported. - - Triton supports only [OTLP/HTTP Exporter](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/otlp.md#otlphttp) and allows specification of only url for this exporter through diff --git a/qa/L0_trace/opentelemetry_unittest.py b/qa/L0_trace/opentelemetry_unittest.py index 6ca1c18f49..5055f4e88a 100644 --- a/qa/L0_trace/opentelemetry_unittest.py +++ b/qa/L0_trace/opentelemetry_unittest.py @@ -28,7 +28,7 @@ sys.path.append("../common") import json -import time +import re import unittest import numpy as np @@ -36,31 +36,67 @@ import tritonclient.grpc as grpcclient import tritonclient.http as httpclient -EXPECTED_NUM_SPANS = 10 +EXPECTED_NUM_SPANS = 16 +# OpenTelemetry OStream exporter sets `parent_span_id` to "0000000000000000", +# if current span is a root span, i.e. there is no parent span. +# https://github.com/open-telemetry/opentelemetry-cpp/blob/b7fd057185c4ed2dff507b859cbe058b7609fb4a/exporters/ostream/src/span_exporter.cc#L78C54-L78C68 +NO_PARENT_SPAN = "0000000000000000" class OpenTelemetryTest(tu.TestResultCollector): def setUp(self): - while True: - with open("trace_collector.log", "rt") as f: - data = f.read() - if data.count("resource_spans") != EXPECTED_NUM_SPANS: - time.sleep(5) - continue - else: - break - - data = data.split("\n") - full_spans = [ - entry.split("POST")[0] for entry in data if "resource_spans" in entry - ] - self.spans = [] - for span in full_spans: - span = json.loads(span) - self.spans.append(span["resource_spans"][0]["scope_spans"][0]["spans"][0]) + # Extracted spans are in json-like format, thus data needs to be + # post-processed, so that `json` could accept it for further + # processing + with open("trace_collector.log", "rt") as f: + data = f.read() + # Removing new lines and tabs around `{` + json_string = re.sub("\n\t{\n\t", "{", data) + # `resources` field is a dictionary, so adding `{` and`}` + # in the next 2 transformations, `instr-lib` is a next field, + # so whatever goes before it, belongs to `resources`. + json_string = re.sub( + "resources : \n\t", "resources : {\n\t", json_string + ) + json_string = re.sub( + "\n instr-lib :", "}\n instr-lib :", json_string + ) + # `json`` expects "key":"value" format, some fields in the + # data have empty string as value, so need to add `"",` + json_string = re.sub(": \n\t", ':"",', json_string) + json_string = re.sub(": \n", ':"",', json_string) + # Extracted data missing `,' after each key-value pair, + # which `json` exppects + json_string = re.sub("\n|\n\t", ",", json_string) + # Removing tabs + json_string = re.sub("\t", "", json_string) + # `json` expects each key and value have `"`'s, so adding them to + # every word/number/alpha-numeric entry + json_string = re.sub(r"\b([\w.-]+)\b", r'"\1"', json_string) + # `span kind`` represents one key + json_string = re.sub('"span" "kind"', '"span kind"', json_string) + # Removing extra `,` + json_string = re.sub("{,", "{", json_string) + json_string = re.sub(",}", "}", json_string) + # Adding `,` between dictionary entries + json_string = re.sub("}{", "},{", json_string) + # `events` is a list of dictionaries, `json` will accept it in the + # form of "events" : [{....}, {.....}, ...] + json_string = re.sub( + '"events" : {', '"events" : [{', json_string + ) + # Closing `events`' list of dictionaries + json_string = re.sub('}, "links"', '}], "links"', json_string) + # Last 2 symbols are not needed + json_string = json_string[:-2] + # Since now `json_string` is a string, which represents dictionaries, + # we put it into one dictionary, so that `json` could read it as one. + json_string = '{ "spans" :[' + json_string + "] }" + self.spans = json.loads(json_string)["spans"] self.simple_model_name = "simple" self.ensemble_model_name = "ensemble_add_sub_int32_int32_int32" + self.bls_model_name = "bls_simple" self.root_span = "InferRequest" def _check_events(self, span_name, events): @@ -121,12 +157,16 @@ def _check_parent(self, child_span, parent_span): # Check that child and parent span have the same trace_id # and child's `parent_span_id` is the same as parent's `span_id` self.assertEqual(child_span["trace_id"], parent_span["trace_id"]) - self.assertIn( - "parent_span_id", - child_span, + self.assertNotEqual( + child_span["parent_span_id"], + NO_PARENT_SPAN, "child span does not have parent span id specified", ) - self.assertEqual(child_span["parent_span_id"], parent_span["span_id"]) + self.assertEqual( + child_span["parent_span_id"], + parent_span["span_id"], + "child {} , parent {}".format(child_span, parent_span), + ) def test_spans(self): parsed_spans = [] @@ -134,19 +174,21 @@ def test_spans(self): # Check that collected spans have proper events recorded for span in self.spans: span_name = span["name"] - self._check_events(span_name, json.dumps(span["events"])) + self._check_events(span_name, str(span["events"])) parsed_spans.append(span_name) - # There should be 6 spans in total: - # 3 for http request, 3 for grpc request, 4 for ensemble - self.assertEqual(len(self.spans), 10) - # We should have 3 compute spans - self.assertEqual(parsed_spans.count("compute"), 3) - # 4 request spans (3 named simple - same as our model name, 1 ensemble) - self.assertEqual(parsed_spans.count(self.simple_model_name), 3) - self.assertEqual(parsed_spans.count(self.ensemble_model_name), 1) - # 3 root spans - self.assertEqual(parsed_spans.count(self.root_span), 3) + # There should be 16 spans in total: + # 3 for http request, 3 for grpc request, 4 for ensemble, 6 for bls + self.assertEqual(len(self.spans), EXPECTED_NUM_SPANS) + # We should have 5 compute spans + self.assertEqual(parsed_spans.count("compute"), 5) + # 7 request spans + # (4 named simple - same as our model name, 2 ensemble, 1 bls) + self.assertEqual(parsed_spans.count(self.simple_model_name), 4) + self.assertEqual(parsed_spans.count(self.ensemble_model_name), 2) + self.assertEqual(parsed_spans.count(self.bls_model_name), 1) + # 4 root spans + self.assertEqual(parsed_spans.count(self.root_span), 4) def test_nested_spans(self): # First 3 spans in `self.spans` belong to HTTP request @@ -157,30 +199,33 @@ def test_nested_spans(self): for child, parent in zip(self.spans[:3], self.spans[1:3]): self._check_parent(child, parent) - # root_span should not have `parent_span_id` field - self.assertNotIn( - "parent_span_id", self.spans[2], "root span has a parent_span_id specified" - ) - # Next 3 spans in `self.spans` belong to GRPC request # Order of spans and their relationship described earlier for child, parent in zip(self.spans[3:6], self.spans[4:6]): self._check_parent(child, parent) - # root_span should not have `parent_span_id` field - self.assertNotIn( - "parent_span_id", self.spans[5], "root span has a parent_span_id specified" - ) - - # Final 4 spans in `self.spans` belong to ensemble request + # Next 4 spans in `self.spans` belong to ensemble request # Order of spans: compute span - request span - request span - root span for child, parent in zip(self.spans[6:10], self.spans[7:10]): self._check_parent(child, parent) - # root_span should not have `parent_span_id` field - self.assertNotIn( - "parent_span_id", self.spans[9], "root span has a parent_span_id specified" - ) + # Final 6 spans in `self.spans` belong to bls with ensemble request + # Order of spans: + # compute span - request span (simple) - request span (ensemble)- + # - compute (for bls) - request (bls) - root span + # request span (ensemble) and compute (for bls) are children of + # request (bls) + children = self.spans[10:] + parents = (self.spans[11:13], self.spans[14], self.spans[14:]) + for child, parent in zip(children, parents[0]): + self._check_parent(child, parent) + + def test_resource_attributes(self): + for span in self.spans: + self.assertIn("test.key", span["resources"]) + self.assertEqual("test.value", span["resources"]["test.key"]) + self.assertIn("service.name", span["resources"]) + self.assertEqual("test_triton", span["resources"]["service.name"]) def prepare_data(client): @@ -214,6 +259,16 @@ def prepare_traces(): inputs = prepare_data(httpclient) triton_client_http.infer("ensemble_add_sub_int32_int32_int32", inputs) + send_bls_request(model_name="ensemble_add_sub_int32_int32_int32") + + +def send_bls_request(model_name="simple"): + with httpclient.InferenceServerClient("localhost:8000") as client: + inputs = prepare_data(httpclient) + inputs.append(httpclient.InferInput("MODEL_NAME", [1], "BYTES")) + inputs[-1].set_data_from_numpy(np.array([model_name], dtype=np.object_)) + client.infer("bls_simple", inputs) + if __name__ == "__main__": unittest.main() diff --git a/qa/L0_trace/test.sh b/qa/L0_trace/test.sh index 984dcb04c4..8a084349fd 100755 --- a/qa/L0_trace/test.sh +++ b/qa/L0_trace/test.sh @@ -34,10 +34,6 @@ CLIENT_LOG="client.log" TEST_RESULT_FILE="test_results.txt" EXPECTED_NUM_TESTS="6" -TRACE_COLLECTOR=trace_collector.py -TRACE_COLLECTOR_LOG="trace_collector.log" -OTLP_PORT=10000 - REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION} if [ "$#" -ge 1 ]; then REPO_VERSION=$1 @@ -55,6 +51,7 @@ export CUDA_VISIBLE_DEVICES=0 DATADIR=/data/inferenceserver/${REPO_VERSION}/qa_model_repository ENSEMBLEDIR=$DATADIR/../qa_ensemble_model_repository/qa_model_repository/ +BLSDIR=../python_models/bls_simple MODELBASE=onnx_int32_int32_int32 MODELSDIR=`pwd`/trace_models @@ -78,7 +75,8 @@ cp -r $DATADIR/$MODELBASE $MODELSDIR/simple && \ rm -r $MODELSDIR/ensemble_add_sub_int32_int32_int32/3 && \ (cd $MODELSDIR/ensemble_add_sub_int32_int32_int32 && \ sed -i "s/^name:.*/name: \"ensemble_add_sub_int32_int32_int32\"/" config.pbtxt && \ - sed -i "s/model_name:.*/model_name: \"simple\"/" config.pbtxt) + sed -i "s/model_name:.*/model_name: \"simple\"/" config.pbtxt) && \ + mkdir -p $MODELSDIR/bls_simple/1 && cp $BLSDIR/bls_simple.py $MODELSDIR/bls_simple/1/model.py RET=0 @@ -618,7 +616,7 @@ wait $SERVER_PID # Check `--trace-config` sets arguments properly -SERVER_ARGS="--trace-config=triton,file=some_file.log --trace-config=level=TIMESTAMPS \ +SERVER_ARGS="--trace-config=triton,file=bls_trace.log --trace-config=level=TIMESTAMPS \ --trace-config=rate=4 --trace-config=count=6 --trace-config=mode=triton --model-repository=$MODELSDIR" SERVER_LOG="./inference_server_trace_config.log" run_server @@ -649,10 +647,17 @@ fi if [ `grep -c "\"log_frequency\":\"0\"" ./curl.out` != "1" ]; then RET=1 fi -if [ `grep -c "\"trace_file\":\"some_file.log\"" ./curl.out` != "1" ]; then +if [ `grep -c "\"trace_file\":\"bls_trace.log\"" ./curl.out` != "1" ]; then RET=1 fi +set +e +# Send bls requests to make sure simple model is traced +for p in {1..4}; do + python -c 'import opentelemetry_unittest; \ + opentelemetry_unittest.send_bls_request(model_name="ensemble_add_sub_int32_int32_int32")' >> client_update.log 2>&1 +done + set -e kill $SERVER_PID @@ -660,26 +665,97 @@ wait $SERVER_PID set +e +$TRACE_SUMMARY -t bls_trace.log > summary_bls.log + +if [ `grep -c "COMPUTE_INPUT_END" summary_bls.log` != "2" ]; then + cat summary_bls.log + echo -e "\n***\n*** Test Failed: Unexpected number of traced "COMPUTE_INPUT_END" events.\n***" + RET=1 +fi + +if [ `grep -c ^ensemble_add_sub_int32_int32_int32 summary_bls.log` != "1" ]; then + cat summary_bls.log + echo -e "\n***\n*** Test Failed: BLS child ensemble model wasn't traced. \n***" + RET=1 +fi + +if [ `grep -c ^simple summary_bls.log` != "1" ]; then + cat summary_bls.log + echo -e "\n***\n*** Test Failed: ensemble's model 'simple' wasn't traced. \n***" + RET=1 +fi + +if [ `grep -o 'parent_id' bls_trace.log | wc -l` != "2" ]; then + cat bls_trace.log + echo -e "\n***\n*** Test Failed: Unexpected number of 'parent id' fields. \n***" + RET=1 +fi + # Check opentelemetry trace exporter sends proper info. # A helper python script starts listening on $OTLP_PORT, where # OTLP exporter sends traces. -# Unittests then check that produced spans have expected format and events -# FIXME: Redesign this test to remove time sensitivity +export TRITON_OPENTELEMETRY_TEST='false' +OTLP_PORT=10000 +OTEL_COLLECTOR_DIR=./opentelemetry-collector +OTEL_COLLECTOR=./opentelemetry-collector/bin/otelcorecol_* +OTEL_COLLECTOR_LOG="./trace_collector_http_exporter.log" + +# Building the latest version of the OpenTelemetry collector. +# Ref: https://opentelemetry.io/docs/collector/getting-started/#local +if [ -d "$OTEL_COLLECTOR_DIR" ]; then rm -Rf $OTEL_COLLECTOR_DIR; fi +git clone https://github.com/open-telemetry/opentelemetry-collector.git +cd $OTEL_COLLECTOR_DIR +make install-tools +make otelcorecol +cd .. +$OTEL_COLLECTOR --config ./trace-config.yaml >> $OTEL_COLLECTOR_LOG 2>&1 & COLLECTOR_PID=$! + + +SERVER_ARGS="--trace-config=level=TIMESTAMPS --trace-config=rate=1 \ + --trace-config=count=100 --trace-config=mode=opentelemetry \ + --trace-config=opentelemetry,url=localhost:$OTLP_PORT/v1/traces \ + --model-repository=$MODELSDIR" +SERVER_LOG="./inference_server_otel_http_exporter.log" + +run_server +if [ "$SERVER_PID" == "0" ]; then + echo -e "\n***\n*** Failed to start $SERVER\n***" + cat $SERVER_LOG + exit 1 +fi +$SIMPLE_HTTP_CLIENT >>$CLIENT_LOG 2>&1 + +set -e + +kill $SERVER_PID +wait $SERVER_PID + +kill $COLLECTOR_PID +wait $COLLECTOR_PID + +set +e + +if ! [[ -s $OTEL_COLLECTOR_LOG && `grep -c 'InstrumentationScope triton-server' $OTEL_COLLECTOR_LOG` == 3 ]] ; then + echo -e "\n***\n*** HTTP exporter test failed.\n***" + cat $OTEL_COLLECTOR_LOG + exit 1 +fi + + +# Unittests then check that produced spans have expected format and events OPENTELEMETRY_TEST=opentelemetry_unittest.py OPENTELEMETRY_LOG="opentelemetry_unittest.log" -EXPECTED_NUM_TESTS="2" +EXPECTED_NUM_TESTS="3" + +export TRITON_OPENTELEMETRY_TEST='true' SERVER_ARGS="--trace-config=level=TIMESTAMPS --trace-config=rate=1 \ --trace-config=count=100 --trace-config=mode=opentelemetry \ - --trace-config=opentelemetry,url=localhost:$OTLP_PORT \ + --trace-config=opentelemetry,resource=test.key=test.value \ + --trace-config=opentelemetry,resource=service.name=test_triton \ --model-repository=$MODELSDIR" -SERVER_LOG="./inference_server_trace_config.log" - -# Increasing OTLP timeout, since we don't use a valid OTLP collector -# and don't send a proper signal back. -export OTEL_EXPORTER_OTLP_TIMEOUT=50000 -export OTEL_EXPORTER_OTLP_TRACES_TIMEOUT=50000 +SERVER_LOG="./inference_server_otel_ostream_exporter.log" run_server if [ "$SERVER_PID" == "0" ]; then @@ -688,18 +764,29 @@ if [ "$SERVER_PID" == "0" ]; then exit 1 fi -# Using netcat as trace collector -apt-get update && apt-get install -y netcat -nc -l -k 127.0.0.1 $OTLP_PORT >> $TRACE_COLLECTOR_LOG 2>&1 & COLLECTOR_PID=$! - set +e # Preparing traces for unittest. -# Note: need to run this separately, to speed up trace collection. -# Otherwise internal (opentelemetry_unittest.OpenTelemetryTest.setUp) check -# will slow down collection. +# Note: running this separately, so that I could extract spans with `grep` +# from server log later. python -c 'import opentelemetry_unittest; \ opentelemetry_unittest.prepare_traces()' >>$CLIENT_LOG 2>&1 +sleep 5 + +set -e + +kill $SERVER_PID +wait $SERVER_PID + +set +e + +grep -z -o -P '({\n(?s).*}\n)' $SERVER_LOG >> trace_collector.log + +if ! [ -s trace_collector.log ] ; then + echo -e "\n***\n*** $SERVER_LOG did not contain any OpenTelemetry spans.\n***" + exit 1 +fi + # Unittest will not start until expected number of spans is collected. python $OPENTELEMETRY_TEST >>$OPENTELEMETRY_LOG 2>&1 if [ $? -ne 0 ]; then @@ -714,12 +801,4 @@ else fi fi -kill $COLLECTOR_PID -wait $COLLECTOR_PID - -set -e - -kill $SERVER_PID -wait $SERVER_PID - exit $RET \ No newline at end of file diff --git a/qa/L0_trace/trace-config.yaml b/qa/L0_trace/trace-config.yaml new file mode 100644 index 0000000000..f8fe2424c0 --- /dev/null +++ b/qa/L0_trace/trace-config.yaml @@ -0,0 +1,45 @@ +# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# Simple config file for OpenTelemetry collector. +# It receives all traces, received on localhost:10000 and prints +# it into the output stream. +# Ref: https://opentelemetry.io/docs/collector/configuration/ +receivers: + otlp: + protocols: + http: + endpoint: 0.0.0.0:10000 + +exporters: + logging: + verbosity: detailed + +service: + pipelines: + traces: + receivers: [otlp] + exporters: [logging] diff --git a/qa/python_models/bls_simple/bls_simple.py b/qa/python_models/bls_simple/bls_simple.py new file mode 100644 index 0000000000..962c3834b9 --- /dev/null +++ b/qa/python_models/bls_simple/bls_simple.py @@ -0,0 +1,84 @@ +# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import triton_python_backend_utils as pb_utils + + +class TritonPythonModel: + @staticmethod + def auto_complete_config(auto_complete_model_config): + inputs = [ + {"name": "MODEL_NAME", "data_type": "TYPE_STRING", "dims": [1]}, + {"name": "INPUT0", "data_type": "TYPE_INT32", "dims": [1, 16]}, + {"name": "INPUT1", "data_type": "TYPE_INT32", "dims": [1, 16]}, + ] + outputs = [ + {"name": "OUTPUT0", "data_type": "TYPE_INT32", "dims": [16]}, + {"name": "OUTPUT1", "data_type": "TYPE_INT32", "dims": [16]}, + ] + + config = auto_complete_model_config.as_dict() + input_names = [] + output_names = [] + for input in config["input"]: + input_names.append(input["name"]) + for output in config["output"]: + output_names.append(output["name"]) + + for input in inputs: + if input["name"] not in input_names: + auto_complete_model_config.add_input(input) + for output in outputs: + if output["name"] not in output_names: + auto_complete_model_config.add_output(output) + + auto_complete_model_config.set_max_batch_size(0) + + return auto_complete_model_config + + def execute(self, requests): + responses = [] + for request in requests: + in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0") + in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1") + model_name = pb_utils.get_input_tensor_by_name(request, "MODEL_NAME") + model_name_string = model_name.as_numpy()[0] + + infer_request = pb_utils.InferenceRequest( + model_name=model_name_string, + requested_output_names=["OUTPUT0", "OUTPUT1"], + inputs=[in_0, in_1], + trace=request.trace(), + ) + + infer_response = infer_request.exec() + + inference_response = pb_utils.InferenceResponse( + output_tensors=infer_response.output_tensors() + ) + responses.append(inference_response) + + return responses diff --git a/src/tracer.cc b/src/tracer.cc index c33aeb546e..d9562f0b82 100644 --- a/src/tracer.cc +++ b/src/tracer.cc @@ -36,10 +36,14 @@ #include #endif // TRITON_ENABLE_GPU #ifndef _WIN32 +#include "opentelemetry/exporters/ostream/span_exporter_factory.h" +#include "opentelemetry/exporters/otlp/otlp_http_exporter_factory.h" +#include "opentelemetry/sdk/resource/semantic_conventions.h" namespace otlp = opentelemetry::exporter::otlp; namespace otel_trace_sdk = opentelemetry::sdk::trace; namespace otel_trace_api = opentelemetry::trace; namespace otel_common = opentelemetry::common; +namespace otel_resource = opentelemetry::sdk::resource; #endif namespace triton { namespace server { @@ -80,6 +84,8 @@ TraceManager::TraceManager( false /*filepath_specified*/, false /*mode_specified*/, false /*config_map_specified*/)); trace_files_.emplace(filepath, file); + + InitTracer(config_map); } TRITONSERVER_Error* @@ -348,37 +354,82 @@ TraceManager::Trace::CaptureTimestamp( } } -#ifndef _WIN32 void -TraceManager::Trace::InitTracer( - const triton::server::TraceConfigMap& config_map) +TraceManager::InitTracer(const triton::server::TraceConfigMap& config_map) { - otlp::OtlpHttpExporterOptions opts; - auto mode_key = std::to_string(TRACE_MODE_OPENTELEMETRY); - auto otel_options_it = config_map.find(mode_key); - if (otel_options_it != config_map.end()) { - for (const auto& setting : otel_options_it->second) { - // FIXME add more configuration options of OTLP HTTP Exporter - if (setting.first == "url") { - opts.url = setting.second; + switch (global_setting_->mode_) { + case TRACE_MODE_OPENTELEMETRY: { +#if !defined(_WIN32) && defined(TRITON_ENABLE_TRACING) + otlp::OtlpHttpExporterOptions opts; + otel_resource::ResourceAttributes attributes = {}; + attributes[otel_resource::SemanticConventions::kServiceName] = + "triton-inference-server"; + auto mode_key = std::to_string(TRACE_MODE_OPENTELEMETRY); + auto otel_options_it = config_map.find(mode_key); + if (otel_options_it != config_map.end()) { + for (const auto& setting : otel_options_it->second) { + // FIXME add more configuration options of OTLP HTTP Exporter + if (setting.first == "url") { + opts.url = setting.second; + } + if (setting.first == "resource") { + auto pos = setting.second.find('='); + auto key = setting.second.substr(0, pos); + auto value = setting.second.substr(pos + 1); + attributes[key] = value; + } + } + } + auto exporter = otlp::OtlpHttpExporterFactory::Create(opts); + auto test_exporter = triton::server::GetEnvironmentVariableOrDefault( + "TRITON_OPENTELEMETRY_TEST", "false"); + if (test_exporter != "false") { + exporter = opentelemetry::exporter::trace::OStreamSpanExporterFactory:: + Create(); } + auto processor = otel_trace_sdk::SimpleSpanProcessorFactory::Create( + std::move(exporter)); + auto resource = otel_resource::Resource::Create(attributes); + std::shared_ptr provider = + otel_trace_sdk::TracerProviderFactory::Create( + std::move(processor), resource); + + otel_trace_api::Provider::SetTracerProvider(provider); + break; +#else + LOG_ERROR << "Unsupported trace mode: " + << TraceManager::InferenceTraceModeString( + global_setting_->mode_); + break; +#endif + } + default: + return; + } +} + +void +TraceManager::CleanupTracer() +{ + switch (global_setting_->mode_) { + case TRACE_MODE_OPENTELEMETRY: { +#if !defined(_WIN32) && defined(TRITON_ENABLE_TRACING) + std::shared_ptr none; + otel_trace_api::Provider::SetTracerProvider(none); + break; +#else + LOG_ERROR << "Unsupported trace mode: " + << TraceManager::InferenceTraceModeString( + global_setting_->mode_); + break; +#endif } + default: + return; } - exporter_ = otlp::OtlpHttpExporterFactory::Create(opts); - processor_ = - otel_trace_sdk::SimpleSpanProcessorFactory::Create(std::move(exporter_)); - provider_ = - otel_trace_sdk::TracerProviderFactory::Create(std::move(processor_)); - auto steady_timestamp_ns = - std::chrono::duration_cast( - std::chrono::steady_clock::now().time_since_epoch()) - .count(); - auto root_span = StartSpan("InferRequest", steady_timestamp_ns); - // Initializing OTel context and storing "InferRequest" span as a root span - // to keep it alive for the duration of the request. - otel_context_ = opentelemetry::context::Context({kRootSpan, root_span}); } +#ifndef _WIN32 void TraceManager::Trace::StartSpan( std::string span_key, TRITONSERVER_InferenceTrace* trace, @@ -404,6 +455,10 @@ TraceManager::Trace::StartSpan( if (parent_id == 0 && activity == TRITONSERVER_TRACE_REQUEST_START) { parent_span_key = kRootSpan; } else if (activity == TRITONSERVER_TRACE_REQUEST_START) { + // [FIXME] For BLS requests parent span for children's request spans + // should be parent model's compute span. Currently, + // this won't work, since parent's compute span will be created + // only after children's spans are created. parent_span_key = kRequestSpan + std::to_string(parent_id); } else if (activity == TRITONSERVER_TRACE_COMPUTE_START) { parent_span_key = kRequestSpan + std::to_string(trace_id); @@ -454,7 +509,8 @@ TraceManager::Trace::StartSpan( otel_context_.GetValue(parent_span_key)); options.parent = parent_span->GetContext(); } - return provider_->GetTracer(kTritonTracer)->StartSpan(display_name, options); + auto provider = opentelemetry::trace::Provider::GetTracerProvider(); + return provider->GetTracer(kTritonTracer)->StartSpan(display_name, options); } void @@ -575,14 +631,17 @@ TraceManager::Trace::AddEvent( void TraceManager::TraceRelease(TRITONSERVER_InferenceTrace* trace, void* userp) { - uint64_t parent_id; + uint64_t id; LOG_TRITONSERVER_ERROR( - TRITONSERVER_InferenceTraceParentId(trace, &parent_id), - "getting trace parent id"); + TRITONSERVER_InferenceTraceId(trace, &id), "getting trace id"); + + auto ts = reinterpret_cast*>(userp); + std::lock_guard lk((*ts)->mtx_); + (*ts)->spawned_traces_tracker_.erase(id); // The userp will be shared with the trace children, so only delete it - // if the root trace is being released - if (parent_id == 0) { - delete reinterpret_cast*>(userp); + // if no more TraceRelease calls are expected + if ((*ts)->spawned_traces_tracker_.empty()) { + delete ts; } LOG_TRITONSERVER_ERROR( TRITONSERVER_InferenceTraceDelete(trace), "deleting trace"); @@ -617,6 +676,10 @@ TraceManager::TraceActivity( reinterpret_cast*>(userp)->get(); std::lock_guard lk(ts->mtx_); + if (ts->spawned_traces_tracker_.find(id) == + ts->spawned_traces_tracker_.end()) { + ts->spawned_traces_tracker_.emplace(id); + } if (ts->setting_->mode_ == TRACE_MODE_OPENTELEMETRY) { #ifndef _WIN32 @@ -627,7 +690,6 @@ TraceManager::TraceActivity( #endif return; } - std::stringstream* ss = nullptr; { if (ts->streams_.find(id) == ts->streams_.end()) { @@ -733,6 +795,7 @@ TraceManager::TraceTensorActivity( std::unique_ptr stream(new std::stringstream()); ss = stream.get(); ts->streams_.emplace(id, std::move(stream)); + ts->spawned_traces_tracker_.emplace(id); } else { ss = ts->streams_[id].get(); // If the string stream is not newly created, add "," as there is @@ -988,7 +1051,15 @@ TraceManager::TraceSetting::SampleTrace() "getting trace id"); if (mode_ == TRACE_MODE_OPENTELEMETRY) { #ifndef _WIN32 - lts->InitTracer(config_map_); + auto steady_timestamp_ns = + std::chrono::duration_cast( + std::chrono::steady_clock::now().time_since_epoch()) + .count(); + auto root_span = lts->StartSpan("InferRequest", steady_timestamp_ns); + // Initializing OTel context and storing "InferRequest" span as a root + // span to keep it alive for the duration of the request. + lts->otel_context_ = + opentelemetry::context::Context({kRootSpan, root_span}); #else LOG_ERROR << "Unsupported trace mode: " << TraceManager::InferenceTraceModeString(mode_); diff --git a/src/tracer.h b/src/tracer.h index 55bf2b9800..ce214085d3 100644 --- a/src/tracer.h +++ b/src/tracer.h @@ -36,14 +36,13 @@ #include #if !defined(_WIN32) && defined(TRITON_ENABLE_TRACING) -#include "opentelemetry/exporters/otlp/otlp_http_exporter_factory.h" #include "opentelemetry/nostd/shared_ptr.h" +#include "opentelemetry/sdk/resource/resource.h" #include "opentelemetry/sdk/trace/processor.h" #include "opentelemetry/sdk/trace/simple_processor_factory.h" #include "opentelemetry/sdk/trace/tracer_provider_factory.h" #include "opentelemetry/trace/context.h" #include "opentelemetry/trace/provider.h" -namespace otlp = opentelemetry::exporter::otlp; namespace otel_trace_sdk = opentelemetry::sdk::trace; namespace otel_trace_api = opentelemetry::trace; #endif @@ -122,7 +121,7 @@ class TraceManager { const std::string& filepath, const InferenceTraceMode mode, const TraceConfigMap& config_map); - ~TraceManager() = default; + ~TraceManager() { CleanupTracer(); } // Return a trace that should be used to collected trace activities // for an inference request. Return nullptr if no tracing should occur. @@ -150,6 +149,19 @@ class TraceManager { static const char* InferenceTraceModeString(InferenceTraceMode mode); + /// In OpenTelemetry trace mode initializes Opentelemetry exporter, processor, + /// and sets the global trace provider. + /// In Triton trace mode is a no-op. + /// + /// \param config_map A config map, which stores all parameters, specified + /// by user. + void InitTracer(const TraceConfigMap& config_map); + + /// In OpenTelemetry trace mode cleans global tracer provider, + /// set by InitTracer. + /// In Triton trace mode is a no-op. + void CleanupTracer(); + struct Trace { Trace() : trace_(nullptr), trace_id_(0) {} ~Trace(); @@ -157,6 +169,10 @@ class TraceManager { // Group the spawned traces by trace ID for better formatting std::mutex mtx_; std::unordered_map> streams_; + // We use the set to track the number of spawned traces, so that + // when TraceManager::TraceRelease() with 'trace_userp_' is called + // we can safely release 'trace_userp_' + std::set spawned_traces_tracker_; // Triton trace object that this trace is assosicated with, // 'Trace' object does not take ownership of 'trace_'. The caller of // SampleTrace() must call TraceManager::TraceRelease() with 'trace_userp_' @@ -172,12 +188,6 @@ class TraceManager { void CaptureTimestamp(const std::string& name, uint64_t timestamp_ns); #if !defined(_WIN32) && defined(TRITON_ENABLE_TRACING) - /// Initializes Opentelemetry exporter, processor, provider and context. - /// - /// \param config_map A config map, which stores all parameters, specified - /// by user. - void InitTracer(const TraceConfigMap& config_map); - /// Reports TRITONSERVER_InferenceTraceActivity as event to /// the currently active span. If activity is an instance of /// `TRITONSERVER_TRACE_REQUEST_START` or @@ -198,6 +208,24 @@ class TraceManager { TRITONSERVER_InferenceTrace* trace, TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns); + /// Starts a span with the provided timestamp and name. + /// + /// \param display_name Span's name, which will be shown in the trace. + /// \param raw_timestamp_ns Steady timestamp, which is used to calculate + /// OpenTelemetry SystemTimestamp to display span on a timeline, and + /// OpenTelemetry SteadyTimestamp to calculate the duration on the span + /// with better precision. + /// \param parent_span_key A span key, to find a parent span in the + /// OpenTelemetry context. If empty, a root span will be started, + /// i.e. with no parent span specified. + /// \return A shared pointer to a newly created OpenTelemetry span. + opentelemetry::nostd::shared_ptr StartSpan( + std::string display_name, const uint64_t& raw_timestamp_ns, + std::string parent_span_key = ""); + + // OTel context to store spans, created in the current trace + opentelemetry::context::Context otel_context_; + private: // OpenTelemetry SDK relies on system's clock for event timestamps. // Triton Tracing records timestamps using steady_clock. This is a @@ -215,15 +243,6 @@ class TraceManager { std::chrono::duration_cast( std::chrono::steady_clock::now().time_since_epoch()); - std::unique_ptr exporter_; - - std::unique_ptr processor_; - - std::shared_ptr provider_; - - // OTel context to store spans, created in the current trace - opentelemetry::context::Context otel_context_; - /// Starts a compute or request span based on `activity`. /// For request spans, it will add the following attributes to the span: /// `model_name`, `model_version`, `trace_id`, `parent_id`. @@ -243,21 +262,6 @@ class TraceManager { TRITONSERVER_InferenceTraceActivity activity, uint64_t timestamp_ns, uint64_t trace_id); - /// Starts a span with the provided timestamp and name. - /// - /// \param display_name Span's name, which will be shown in the trace. - /// \param raw_timestamp_ns Steady timestamp, which is used to calculate - /// OpenTelemetry SystemTimestamp to display span on a timeline, and - /// OpenTelemetry SteadyTimestamp to calculate the duration on the span - /// with better precision. - /// \param parent_span_key A span key, to find a parent span in the - /// OpenTelemetry context. If empty, a root span will be started, - /// i.e. with no parent span specified. - /// \return A shared pointer to a newly created OpenTelemetry span. - opentelemetry::nostd::shared_ptr StartSpan( - std::string display_name, const uint64_t& raw_timestamp_ns, - std::string parent_span_key = ""); - /// Ends the provided span. /// /// \param span_key Span's key to retrieve the corresponding span from the