Skip to content

Commit 05a4914

Browse files
committed
Fixing commit tree:
Refactor code, so that OTel tracer provider is initialized only once Added resource cmd option, testig Added docs
1 parent b55df2e commit 05a4914

File tree

6 files changed

+247
-174
lines changed

6 files changed

+247
-174
lines changed

docs/user_guide/trace.md

+48-6
Original file line numberDiff line numberDiff line change
@@ -427,11 +427,41 @@ The meaning of the trace timestamps is:
427427

428428
* BACKEND_OUTPUT: The tensor in the response of a backend.
429429

430+
## Tracing for BLS models
431+
432+
Triton does not collect traces for child models, invoked from
433+
[BLS](https://github.com/triton-inference-server/python_backend/tree/main#business-logic-scripting) models.
434+
435+
To include child models into collected traces, user needs to provide `trace`
436+
argument (as shown in the example), when constructing an InferenceRequest object.
437+
438+
```python
439+
440+
import triton_python_backend_utils as pb_utils
441+
442+
443+
class TritonPythonModel:
444+
...
445+
def execute(self, requests):
446+
...
447+
for request in requests:
448+
...
449+
# Create an InferenceRequest object. `model_name`,
450+
# `requested_output_names`, and `inputs` are the required arguments and
451+
# must be provided when constructing an InferenceRequest object. Make
452+
# sure to replace `inputs` argument with a list of `pb_utils.Tensor`
453+
# objects.
454+
inference_request = pb_utils.InferenceRequest(
455+
model_name='model_name',
456+
requested_output_names=['REQUESTED_OUTPUT_1', 'REQUESTED_OUTPUT_2'],
457+
inputs=[<pb_utils.Tensor object>], trace = request.trace())
458+
459+
```
460+
430461
## OpenTelemetry trace support
431462

432-
Triton provides an option to generate and export traces
433-
for standalone and ensemble models
434-
using [OpenTelemetry APIs and SDKs](https://opentelemetry.io/).
463+
Triton provides an option to generate and export traces using
464+
[OpenTelemetry APIs and SDKs](https://opentelemetry.io/).
435465

436466
To specify OpenTelemetry mode for tracing, specify the `--trace-config`
437467
flag as follows:
@@ -477,16 +507,28 @@ The following table shows available OpenTelemetry trace APIs settings for
477507
trace data.
478508
</td>
479509
</tr>
510+
<tr>
511+
<td><code>resource</code></td>
512+
<td><code>Empty</code></td>
513+
<td>
514+
Key-value pairs to be used as resource attributes. <br/>
515+
Should be specified as follows:<br/>
516+
<code>--trace-config opentelemetry,resource=service.name=triton</code><br/>
517+
<code>--trace-config opentelemetry,resource=service.version=1</code><br/>
518+
Alternatively, key-vaue attributes can be specified through <br/>
519+
<a href="https://opentelemetry.io/docs/concepts/sdk-configuration/general-sdk-configuration/#otel_resource_attributes">
520+
OTEL_RESOURCE_ATTRIBUTES</a>
521+
environment variable.
522+
</td>
523+
</tr>
480524
</tbody>
481525
</table>
482526

527+
483528
### Limitations
484529

485530
- OpenTelemetry trace mode is not supported on Windows systems.
486531

487-
- Tracing [BLS](https://github.com/triton-inference-server/python_backend/tree/main#business-logic-scripting)
488-
models is not supported.
489-
490532
- Triton supports only
491533
[OTLP/HTTP Exporter](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/otlp.md#otlphttp)
492534
and allows specification of only url for this exporter through

qa/L0_trace/opentelemetry_unittest.py

+88-70
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
import tritonclient.grpc as grpcclient
3737
import tritonclient.http as httpclient
3838

39-
EXPECTED_NUM_SPANS = 10
39+
EXPECTED_NUM_SPANS = 16
4040

4141

4242
class OpenTelemetryTest(tu.TestResultCollector):
@@ -50,68 +50,66 @@ def setUp(self):
5050
else:
5151
break
5252

53-
data = data.split('\n')
53+
data = data.split("\n")
5454
full_spans = [
55-
entry.split('POST')[0]
56-
for entry in data
57-
if "resource_spans" in entry
55+
entry.split("POST")[0] for entry in data if "resource_spans" in entry
5856
]
5957
self.spans = []
58+
self.resource_attributes = []
6059
for span in full_spans:
6160
span = json.loads(span)
62-
self.spans.append(
63-
span["resource_spans"][0]['scope_spans'][0]['spans'][0])
61+
self.spans.append(span["resource_spans"][0]["scope_spans"][0]["spans"][0])
62+
self.resource_attributes.append(
63+
span["resource_spans"][0]["resource"]["attributes"]
64+
)
6465

6566
self.simple_model_name = "simple"
6667
self.ensemble_model_name = "ensemble_add_sub_int32_int32_int32"
68+
self.bls_model_name = "bls_simple"
6769
self.root_span = "InferRequest"
6870

6971
def _check_events(self, span_name, events):
70-
root_events_http =\
71-
["HTTP_RECV_START",
72-
"HTTP_RECV_END",
73-
"INFER_RESPONSE_COMPLETE",
74-
"HTTP_SEND_START",
75-
"HTTP_SEND_END"]
76-
root_events_grpc =\
77-
["GRPC_WAITREAD_START",
78-
"GRPC_WAITREAD_END",
79-
"INFER_RESPONSE_COMPLETE",
80-
"GRPC_SEND_START",
81-
"GRPC_SEND_END"]
82-
request_events =\
83-
["REQUEST_START",
84-
"QUEUE_START",
85-
"REQUEST_END"]
86-
compute_events =\
87-
["COMPUTE_START",
88-
"COMPUTE_INPUT_END",
89-
"COMPUTE_OUTPUT_START",
90-
"COMPUTE_END"]
72+
root_events_http = [
73+
"HTTP_RECV_START",
74+
"HTTP_RECV_END",
75+
"INFER_RESPONSE_COMPLETE",
76+
"HTTP_SEND_START",
77+
"HTTP_SEND_END",
78+
]
79+
root_events_grpc = [
80+
"GRPC_WAITREAD_START",
81+
"GRPC_WAITREAD_END",
82+
"INFER_RESPONSE_COMPLETE",
83+
"GRPC_SEND_START",
84+
"GRPC_SEND_END",
85+
]
86+
request_events = ["REQUEST_START", "QUEUE_START", "REQUEST_END"]
87+
compute_events = [
88+
"COMPUTE_START",
89+
"COMPUTE_INPUT_END",
90+
"COMPUTE_OUTPUT_START",
91+
"COMPUTE_END",
92+
]
9193

9294
if span_name == "compute":
9395
# Check that all compute related events (and only them)
9496
# are recorded in compute span
9597
self.assertTrue(all(entry in events for entry in compute_events))
9698
self.assertFalse(all(entry in events for entry in request_events))
9799
self.assertFalse(
98-
all(entry in events
99-
for entry in root_events_http + root_events_grpc))
100+
all(entry in events for entry in root_events_http + root_events_grpc)
101+
)
100102

101103
elif span_name == self.root_span:
102104
# Check that root span has INFER_RESPONSE_COMPLETE, _RECV/_WAITREAD
103105
# and _SEND events (and only them)
104106
if "HTTP" in events:
105-
self.assertTrue(
106-
all(entry in events for entry in root_events_http))
107-
self.assertFalse(
108-
all(entry in events for entry in root_events_grpc))
107+
self.assertTrue(all(entry in events for entry in root_events_http))
108+
self.assertFalse(all(entry in events for entry in root_events_grpc))
109109

110110
elif "GRPC" in events:
111-
self.assertTrue(
112-
all(entry in events for entry in root_events_grpc))
113-
self.assertFalse(
114-
all(entry in events for entry in root_events_http))
111+
self.assertTrue(all(entry in events for entry in root_events_grpc))
112+
self.assertFalse(all(entry in events for entry in root_events_http))
115113
self.assertFalse(all(entry in events for entry in request_events))
116114
self.assertFalse(all(entry in events for entry in compute_events))
117115

@@ -120,17 +118,20 @@ def _check_events(self, span_name, events):
120118
# are recorded in request span
121119
self.assertTrue(all(entry in events for entry in request_events))
122120
self.assertFalse(
123-
all(entry in events
124-
for entry in root_events_http + root_events_grpc))
121+
all(entry in events for entry in root_events_http + root_events_grpc)
122+
)
125123
self.assertFalse(all(entry in events for entry in compute_events))
126124

127125
def _check_parent(self, child_span, parent_span):
128126
# Check that child and parent span have the same trace_id
129127
# and child's `parent_span_id` is the same as parent's `span_id`
130-
self.assertEqual(child_span['trace_id'], parent_span['trace_id'])
131-
self.assertIn('parent_span_id', child_span,
132-
"child span does not have parent span id specified")
133-
self.assertEqual(child_span['parent_span_id'], parent_span['span_id'])
128+
self.assertEqual(child_span["trace_id"], parent_span["trace_id"])
129+
self.assertIn(
130+
"parent_span_id",
131+
child_span,
132+
"child span does not have parent span id specified",
133+
)
134+
self.assertEqual(child_span["parent_span_id"], parent_span["span_id"])
134135

135136
def test_spans(self):
136137
parsed_spans = []
@@ -141,16 +142,18 @@ def test_spans(self):
141142
self._check_events(span_name, json.dumps(span["events"]))
142143
parsed_spans.append(span_name)
143144

144-
# There should be 6 spans in total:
145-
# 3 for http request, 3 for grpc request, 4 for ensemble
146-
self.assertEqual(len(self.spans), 10)
147-
# We should have 3 compute spans
148-
self.assertEqual(parsed_spans.count("compute"), 3)
149-
# 4 request spans (3 named simple - same as our model name, 1 ensemble)
150-
self.assertEqual(parsed_spans.count(self.simple_model_name), 3)
151-
self.assertEqual(parsed_spans.count(self.ensemble_model_name), 1)
152-
# 3 root spans
153-
self.assertEqual(parsed_spans.count(self.root_span), 3)
145+
# There should be 16 spans in total:
146+
# 3 for http request, 3 for grpc request, 4 for ensemble, 6 for bls
147+
self.assertEqual(len(self.spans), EXPECTED_NUM_SPANS)
148+
# We should have 5 compute spans
149+
self.assertEqual(parsed_spans.count("compute"), 5)
150+
# 7 request spans
151+
# (4 named simple - same as our model name, 2 ensemble, 1 bls)
152+
self.assertEqual(parsed_spans.count(self.simple_model_name), 4)
153+
self.assertEqual(parsed_spans.count(self.ensemble_model_name), 2)
154+
self.assertEqual(parsed_spans.count(self.bls_model_name), 1)
155+
# 4 root spans
156+
self.assertEqual(parsed_spans.count(self.root_span), 4)
154157

155158
def test_nested_spans(self):
156159
# First 3 spans in `self.spans` belong to HTTP request
@@ -162,27 +165,41 @@ def test_nested_spans(self):
162165
self._check_parent(child, parent)
163166

164167
# root_span should not have `parent_span_id` field
165-
self.assertNotIn('parent_span_id', self.spans[2],
166-
"root span has a parent_span_id specified")
168+
self.assertNotIn(
169+
"parent_span_id", self.spans[2], "root span has a parent_span_id specified"
170+
)
167171

168172
# Next 3 spans in `self.spans` belong to GRPC request
169173
# Order of spans and their relationship described earlier
170174
for child, parent in zip(self.spans[3:6], self.spans[4:6]):
171175
self._check_parent(child, parent)
172176

173177
# root_span should not have `parent_span_id` field
174-
self.assertNotIn('parent_span_id', self.spans[5],
175-
"root span has a parent_span_id specified")
178+
self.assertNotIn(
179+
"parent_span_id", self.spans[5], "root span has a parent_span_id specified"
180+
)
176181

177182
# Final 4 spans in `self.spans` belong to ensemble request
178183
# Order of spans: compute span - request span - request span - root span
179184
for child, parent in zip(self.spans[6:10], self.spans[7:10]):
180185
self._check_parent(child, parent)
181186

182187
# root_span should not have `parent_span_id` field
183-
self.assertNotIn('parent_span_id', self.spans[9],
184-
"root span has a parent_span_id specified")
185-
188+
self.assertNotIn(
189+
"parent_span_id", self.spans[9], "root span has a parent_span_id specified"
190+
)
191+
192+
def test_resource_attributes(self):
193+
test_attribute_entry = "{{'key': {k}, 'value': {{'string_value': {v}}}}}"
194+
for attribute in self.resource_attributes:
195+
self.assertIn(
196+
test_attribute_entry.format(k="'test.key'", v="'test.value'"),
197+
str(attribute),
198+
)
199+
self.assertIn(
200+
test_attribute_entry.format(k="'service.name'", v="'test_triton'"),
201+
str(attribute),
202+
)
186203

187204

188205
def prepare_data(client):
@@ -201,11 +218,12 @@ def prepare_data(client):
201218

202219

203220
def prepare_traces():
204-
205-
triton_client_http = httpclient.InferenceServerClient("localhost:8000",
206-
verbose=True)
207-
triton_client_grpc = grpcclient.InferenceServerClient("localhost:8001",
208-
verbose=True)
221+
triton_client_http = httpclient.InferenceServerClient(
222+
"localhost:8000", verbose=True
223+
)
224+
triton_client_grpc = grpcclient.InferenceServerClient(
225+
"localhost:8001", verbose=True
226+
)
209227
inputs = prepare_data(httpclient)
210228
triton_client_http.infer("simple", inputs)
211229

@@ -215,16 +233,16 @@ def prepare_traces():
215233
inputs = prepare_data(httpclient)
216234
triton_client_http.infer("ensemble_add_sub_int32_int32_int32", inputs)
217235

236+
send_bls_request(model_name="ensemble_add_sub_int32_int32_int32")
218237

219-
def send_bls_request():
220238

239+
def send_bls_request(model_name="simple"):
221240
with httpclient.InferenceServerClient("localhost:8000") as client:
222-
223241
inputs = prepare_data(httpclient)
224242
inputs.append(httpclient.InferInput("MODEL_NAME", [1], "BYTES"))
225-
inputs[2].set_data_from_numpy(np.array(["simple"], dtype=np.object_))
243+
inputs[2].set_data_from_numpy(np.array([model_name], dtype=np.object_))
226244
client.infer("bls_simple", inputs)
227245

228246

229-
if __name__ == '__main__':
247+
if __name__ == "__main__":
230248
unittest.main()

qa/L0_trace/test.sh

+6-6
Original file line numberDiff line numberDiff line change
@@ -659,7 +659,7 @@ set +e
659659
# Send bls requests to make sure simple model is traced
660660
for p in {1..4}; do
661661
python -c 'import opentelemetry_unittest; \
662-
opentelemetry_unittest.send_bls_request()' >> client_update.log 2>&1
662+
opentelemetry_unittest.send_bls_request(model_name="ensemble_add_sub_int32_int32_int32")' >> client_update.log 2>&1
663663
done
664664

665665
set -e
@@ -695,18 +695,18 @@ fi
695695

696696
OPENTELEMETRY_TEST=opentelemetry_unittest.py
697697
OPENTELEMETRY_LOG="opentelemetry_unittest.log"
698-
EXPECTED_NUM_TESTS="2"
698+
EXPECTED_NUM_TESTS="3"
699699

700700
SERVER_ARGS="--trace-config=level=TIMESTAMPS --trace-config=rate=1 \
701701
--trace-config=count=100 --trace-config=mode=opentelemetry \
702702
--trace-config=opentelemetry,url=localhost:$OTLP_PORT \
703+
--trace-config=opentelemetry,resource=test.key=test.value \
704+
--trace-config=opentelemetry,resource=service.name=test_triton \
703705
--model-repository=$MODELSDIR"
704706
SERVER_LOG="./inference_server_trace_config.log"
705707

706-
# Increasing OTLP timeout, since we don't use a valid OTLP collector
707-
# and don't send a proper signal back.
708-
export OTEL_EXPORTER_OTLP_TIMEOUT=50000
709-
export OTEL_EXPORTER_OTLP_TRACES_TIMEOUT=50000
708+
export OTEL_EXPORTER_OTLP_TIMEOUT=5
709+
export OTEL_EXPORTER_OTLP_TRACES_TIMEOUT=5
710710

711711
run_server
712712
if [ "$SERVER_PID" == "0" ]; then

0 commit comments

Comments
 (0)