Skip to content

Commit 71eeb29

Browse files
committed
Revised PR, added ensemble test
1 parent 1cbc24a commit 71eeb29

File tree

6 files changed

+212
-210
lines changed

6 files changed

+212
-210
lines changed

docs/user_guide/trace.md

+1
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,7 @@ The meaning of the trace timestamps is:
430430
## OpenTelemetry trace support
431431

432432
Triton provides an option to generate and export traces
433+
for standalone and ensemble models
433434
using [OpenTelemetry APIs and SDKs](https://opentelemetry.io/).
434435

435436
To specify OpenTelemetry mode for tracing, specify the `--trace-config`

qa/L0_trace/opentelemetry_unittest.py

+71-18
Original file line numberDiff line numberDiff line change
@@ -27,28 +27,38 @@
2727
import sys
2828

2929
sys.path.append("../common")
30-
3130
import json
3231
import unittest
33-
32+
import tritonclient.http as httpclient
33+
import tritonclient.grpc as grpcclient
34+
import numpy as np
3435
import test_util as tu
36+
import time
3537

38+
EXPECTED_NUM_SPANS = 10
3639

3740
class OpenTelemetryTest(tu.TestResultCollector):
3841

3942
def setUp(self):
40-
with open('trace_collector.log', 'rt') as f:
41-
data = f.read()
42-
43+
while True:
44+
with open('trace_collector.log', 'rt') as f:
45+
data = f.read()
46+
if data.count("resource_spans") != EXPECTED_NUM_SPANS:
47+
time.sleep(5)
48+
continue
49+
else:
50+
break
51+
4352
data = data.split('\n')
44-
full_spans = [entry for entry in data if "resource_spans" in entry]
53+
full_spans = [entry.split('POST')[0] for entry in data if "resource_spans" in entry]
4554
self.spans = []
4655
for span in full_spans:
4756
span = json.loads(span)
4857
self.spans.append(
4958
span["resource_spans"][0]['scope_spans'][0]['spans'][0])
5059

51-
self.model_name = "simple"
60+
self.simple_model_name = "simple"
61+
self.ensemble_model_name = "ensemble_add_sub_int32_int32_int32"
5262
self.root_span = "InferRequest"
5363

5464
def _check_events(self, span_name, events):
@@ -102,7 +112,7 @@ def _check_events(self, span_name, events):
102112
self.assertFalse(
103113
all(entry in events for entry in compute_events))
104114

105-
elif span_name == self.model_name:
115+
elif span_name == self.simple_model_name:
106116
# Check that all request related events (and only them)
107117
# are recorded in request span
108118
self.assertTrue(all(entry in events for entry in request_events))
@@ -131,14 +141,15 @@ def test_spans(self):
131141
parsed_spans.append(span_name)
132142

133143
# There should be 6 spans in total:
134-
# 3 for http request and 3 for grpc request.
135-
self.assertEqual(len(self.spans), 6)
136-
# We should have 2 compute spans
137-
self.assertEqual(parsed_spans.count("compute"), 2)
138-
# 2 request spans (named simple - same as our model name)
139-
self.assertEqual(parsed_spans.count(self.model_name), 2)
140-
# 2 root spans
141-
self.assertEqual(parsed_spans.count(self.root_span), 2)
144+
# 3 for http request, 3 for grpc request, 4 for ensemble
145+
self.assertEqual(len(self.spans), 10)
146+
# We should have 3 compute spans
147+
self.assertEqual(parsed_spans.count("compute"), 3)
148+
# 4 request spans (3 named simple - same as our model name, 1 ensemble)
149+
self.assertEqual(parsed_spans.count(self.simple_model_name), 3)
150+
self.assertEqual(parsed_spans.count(self.ensemble_model_name), 1)
151+
# 3 root spans
152+
self.assertEqual(parsed_spans.count(self.root_span), 3)
142153

143154
def test_nested_spans(self):
144155

@@ -156,9 +167,9 @@ def test_nested_spans(self):
156167
self.spans[2],
157168
"root span has a parent_span_id specified")
158169

159-
# Last 3 spans in `self.spans` belong to GRPC request
170+
# Next 3 spans in `self.spans` belong to GRPC request
160171
# Order of spans and their relationship described earlier
161-
for child, parent in zip(self.spans[3:], self.spans[4:]):
172+
for child, parent in zip(self.spans[3:6], self.spans[4:6]):
162173
self._check_parent(child, parent)
163174

164175
# root_span should not have `parent_span_id` field
@@ -167,6 +178,48 @@ def test_nested_spans(self):
167178
self.spans[5],
168179
"root span has a parent_span_id specified")
169180

181+
# Final 4 spans in `self.spans` belong to ensemble request
182+
# Order of spans: compute span - request span - request span - root span
183+
for child, parent in zip(self.spans[6:10], self.spans[7:10]):
184+
self._check_parent(child, parent)
185+
186+
# root_span should not have `parent_span_id` field
187+
self.assertNotIn(
188+
'parent_span_id',
189+
self.spans[9],
190+
"root span has a parent_span_id specified")
191+
192+
def prepare_data(client):
193+
194+
inputs = []
195+
outputs = []
196+
input0_data = np.full(shape=(1, 16), fill_value=-1, dtype=np.int32)
197+
input1_data = np.full(shape=(1, 16), fill_value=-1, dtype=np.int32)
198+
199+
inputs.append(client.InferInput('INPUT0', [1, 16], "INT32"))
200+
inputs.append(client.InferInput('INPUT1', [1, 16], "INT32"))
201+
202+
# Initialize the data
203+
inputs[0].set_data_from_numpy(input0_data)
204+
inputs[1].set_data_from_numpy(input1_data)
205+
206+
return inputs
207+
208+
def prepare_traces():
209+
210+
triton_client_http = httpclient.InferenceServerClient("localhost:8000",
211+
verbose=True)
212+
triton_client_grpc = grpcclient.InferenceServerClient("localhost:8001",
213+
verbose=True)
214+
inputs = prepare_data(httpclient)
215+
triton_client_http.infer("simple",inputs)
216+
217+
inputs = prepare_data(grpcclient)
218+
triton_client_grpc.infer("simple", inputs)
219+
220+
inputs = prepare_data(httpclient)
221+
triton_client_http.infer("ensemble_add_sub_int32_int32_int32", inputs)
222+
170223

171224
if __name__ == '__main__':
172225
unittest.main()

qa/L0_trace/test.sh

+23-35
Original file line numberDiff line numberDiff line change
@@ -66,14 +66,19 @@ rm -f *.log
6666
rm -fr $MODELSDIR && mkdir -p $MODELSDIR
6767

6868
# set up simple and global_simple model using MODELBASE
69-
rm -fr $MODELSDIR && mkdir -p $MODELSDIR && \
70-
cp -r $DATADIR/$MODELBASE $MODELSDIR/simple && \
69+
cp -r $DATADIR/$MODELBASE $MODELSDIR/simple && \
7170
rm -r $MODELSDIR/simple/2 && rm -r $MODELSDIR/simple/3 && \
7271
(cd $MODELSDIR/simple && \
7372
sed -i "s/^name:.*/name: \"simple\"/" config.pbtxt) && \
7473
cp -r $MODELSDIR/simple $MODELSDIR/global_simple && \
7574
(cd $MODELSDIR/global_simple && \
7675
sed -i "s/^name:.*/name: \"global_simple\"/" config.pbtxt) && \
76+
cp -r $ENSEMBLEDIR/simple_onnx_int32_int32_int32 $MODELSDIR/ensemble_add_sub_int32_int32_int32 && \
77+
rm -r $MODELSDIR/ensemble_add_sub_int32_int32_int32/2 && \
78+
rm -r $MODELSDIR/ensemble_add_sub_int32_int32_int32/3 && \
79+
(cd $MODELSDIR/ensemble_add_sub_int32_int32_int32 && \
80+
sed -i "s/^name:.*/name: \"ensemble_add_sub_int32_int32_int32\"/" config.pbtxt && \
81+
sed -i "s/model_name:.*/model_name: \"simple\"/" config.pbtxt)
7782

7883
RET=0
7984

@@ -661,11 +666,12 @@ wait $SERVER_PID
661666

662667
OPENTELEMETRY_TEST=opentelemetry_unittest.py
663668
OPENTELEMETRY_LOG="opentelemetry_unittest.log"
664-
TEST_RESULT_FILE="opentelemetry_results.txt"
665669
EXPECTED_NUM_TESTS="2"
666670

667-
SERVER_ARGS="--trace-config=triton,file=some_file.log --trace-config=level=TIMESTAMPS \
668-
--trace-config=rate=1 --trace-config=count=6 --trace-config=mode=opentelemetry --trace-config=opentelemetry,url=localhost:$OTLP_PORT --model-repository=$MODELSDIR"
671+
SERVER_ARGS="--trace-config=level=TIMESTAMPS --trace-config=rate=1 \
672+
--trace-config=count=100 --trace-config=mode=opentelemetry \
673+
--trace-config=opentelemetry,url=localhost:$OTLP_PORT \
674+
--model-repository=$MODELSDIR"
669675
SERVER_LOG="./inference_server_trace_config.log"
670676
run_server
671677
if [ "$SERVER_PID" == "0" ]; then
@@ -674,37 +680,18 @@ if [ "$SERVER_PID" == "0" ]; then
674680
exit 1
675681
fi
676682

677-
# This is a simple python code that opens port
678-
python $TRACE_COLLECTOR $OTLP_PORT $TRACE_COLLECTOR_LOG &
679-
COLLECTOR_PID=$!
680-
681-
set +e
682-
683-
# To make sure receiver is ready
684-
sleep 3
685-
686-
# Send http request and collect trace
687-
$SIMPLE_HTTP_CLIENT >> client_update.log 2>&1
688-
if [ $? -ne 0 ]; then
689-
cat client_update.log
690-
RET=1
691-
fi
692-
693-
# Send grpc request and collect trace
694-
$SIMPLE_GRPC_CLIENT >> client_update.log 2>&1
695-
if [ $? -ne 0 ]; then
696-
cat client_update.log
697-
RET=1
698-
fi
699-
# To make sure log gets all data
700-
sleep 3
701-
702-
kill $COLLECTOR_PID
703-
wait $COLLECTOR_PID
683+
# Using netcat as trace collector
684+
apt-get update && apt-get install -y netcat
685+
nc -l -k 127.0.0.1 $OTLP_PORT >> $TRACE_COLLECTOR_LOG 2>&1 & COLLECTOR_PID=$!
704686

687+
# Preparing traces for unittest.
688+
# Note: need to run this separately, to speed up trace collection.
689+
# Otherwise internal (opentelemetry_unittest.OpenTelemetryTest.setUp) check
690+
# will slow down collection.
691+
python -c 'import opentelemetry_unittest; opentelemetry_unittest.prepare_traces()' >>$CLIENT_LOG 2>&1
705692

706693
set +e
707-
694+
# Unittest will not start untill expected number of spans is collected.
708695
python $OPENTELEMETRY_TEST >>$OPENTELEMETRY_LOG 2>&1
709696
if [ $? -ne 0 ]; then
710697
cat $OPENTELEMETRY_LOG
@@ -718,11 +705,12 @@ else
718705
fi
719706
fi
720707

708+
kill $COLLECTOR_PID
709+
wait $COLLECTOR_PID
710+
721711
set -e
722712

723713
kill $SERVER_PID
724714
wait $SERVER_PID
725715

726-
set +e
727-
728716
exit $RET

qa/L0_trace/trace_collector.py

-66
This file was deleted.

0 commit comments

Comments
 (0)