Skip to content

Commit 693bd2b

Browse files
committed
Improve testing
1 parent 852da22 commit 693bd2b

File tree

4 files changed

+183
-22
lines changed

4 files changed

+183
-22
lines changed

qa/L0_implicit_state/implicit_state.py

+21-3
Original file line numberDiff line numberDiff line change
@@ -115,29 +115,47 @@ def test_implicit_state_growable_memory(self):
115115
inputs[0].set_data_from_numpy(np.random.randint(5, size=[1], dtype=np.int32))
116116
inputs[1].set_data_from_numpy(np.asarray([3], dtype=np.int32))
117117

118-
triton_client.infer(
118+
output = triton_client.infer(
119119
model_name="growable_memory",
120120
inputs=inputs,
121121
sequence_id=2,
122122
sequence_start=True,
123123
sequence_end=False,
124124
)
125+
output_state = output.as_numpy("OUTPUT_STATE")
126+
expected_output_state = np.zeros(output_state.shape, dtype=np.int8)
127+
np.testing.assert_equal(output_state, expected_output_state)
125128

126-
triton_client.infer(
129+
output = triton_client.infer(
127130
model_name="growable_memory",
128131
inputs=inputs,
129132
sequence_id=2,
130133
sequence_start=False,
131134
sequence_end=False,
132135
)
136+
output_state = output.as_numpy("OUTPUT_STATE")
137+
expected_output_state = np.concatenate(
138+
[expected_output_state, np.ones(expected_output_state.shape, dtype=np.int8)]
139+
)
140+
np.testing.assert_equal(output_state, expected_output_state)
133141

134-
triton_client.infer(
142+
output = triton_client.infer(
135143
model_name="growable_memory",
136144
inputs=inputs,
137145
sequence_id=2,
138146
sequence_start=False,
139147
sequence_end=True,
140148
)
149+
output_state = output.as_numpy("OUTPUT_STATE")
150+
expected_output_state = np.concatenate(
151+
[
152+
expected_output_state,
153+
np.full(
154+
(expected_output_state.shape[0] // 2,), dtype=np.int8, fill_value=2
155+
),
156+
]
157+
)
158+
np.testing.assert_equal(output_state, expected_output_state)
141159

142160
def test_no_update(self):
143161
# Test implicit state without updating any state
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Redistribution and use in source and binary forms, with or without
4+
# modification, are permitted provided that the following conditions # are met:
5+
# * Redistributions of source code must retain the above copyright
6+
# notice, this list of conditions and the following disclaimer.
7+
# * Redistributions in binary form must reproduce the above copyright
8+
# notice, this list of conditions and the following disclaimer in the
9+
# documentation and/or other materials provided with the distribution.
10+
# * Neither the name of NVIDIA CORPORATION nor the names of its
11+
# contributors may be used to endorse or promote products derived
12+
# from this software without specific prior written permission.
13+
#
14+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
15+
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
18+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22+
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25+
26+
name: "growable_memory"
27+
backend: "implicit_state"
28+
max_batch_size: 0
29+
sequence_batching {
30+
control_input [
31+
{
32+
name: "START"
33+
control [
34+
{
35+
kind: CONTROL_SEQUENCE_START
36+
fp32_false_true: [ 0, 1 ]
37+
}
38+
]
39+
},
40+
{
41+
name: "READY"
42+
control [
43+
{
44+
kind: CONTROL_SEQUENCE_READY
45+
fp32_false_true: [ 0, 1 ]
46+
}
47+
]
48+
},
49+
{
50+
name: "END"
51+
control [
52+
{
53+
kind: CONTROL_SEQUENCE_END
54+
fp32_false_true: [ 0, 1 ]
55+
}
56+
]
57+
}
58+
]
59+
state [
60+
{
61+
input_name: "INPUT_STATE"
62+
output_name: "OUTPUT_STATE"
63+
data_type: TYPE_INT8
64+
dims: [1024, 1024]
65+
use_single_buffer: true
66+
use_growable_memory: true
67+
}
68+
]
69+
}
70+
71+
input [
72+
{
73+
name: "INPUT"
74+
data_type: TYPE_INT32
75+
dims: [ 1 ]
76+
},
77+
{
78+
name: "TEST_CASE"
79+
data_type: TYPE_INT32
80+
dims: [ 1 ]
81+
}
82+
]
83+
84+
output [
85+
{
86+
name: "OUTPUT"
87+
data_type: TYPE_INT32
88+
dims: [ 1 ]
89+
},
90+
{
91+
name: "OUTPUT_STATE"
92+
data_type: TYPE_INT8
93+
dims: [ 1 ]
94+
}
95+
]
96+
97+
instance_group [
98+
{
99+
count: 1
100+
kind : KIND_GPU
101+
}
102+
]

qa/L0_sequence_batcher/test.sh

+16-15
Original file line numberDiff line numberDiff line change
@@ -337,21 +337,22 @@ for MODEL in $MODELS; do
337337
sed -i "s/$(basename $MODEL)/$(basename $MODEL)_full/" config.pbtxt && \
338338
sed -i "s/minimum_slot_utilization: 0/minimum_slot_utilization: 1/" config.pbtxt)
339339

340-
if [ "$USE_SINGLE_BUFFER" == "1" && "$IMPLICIT_STATE" == "1" ]; then
341-
SED_REPLACE_PATTERN="N;N;N;N;N;/state.*dims:.*/a use_single_buffer: true"
342-
(cd models0/$(basename $MODEL) && \
343-
sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
344-
(cd models1/$(basename $MODEL) && \
345-
sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
346-
(cd models2/$(basename $MODEL) && \
347-
sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
348-
(cd models4/$(basename $MODEL) && \
349-
sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
350-
(cd queue_delay_models/$(basename $MODEL)_full && \
351-
sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
352-
(cd queue_delay_models/$(basename $MODEL)_half && \
353-
sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
354-
fi
340+
# TODO: Enable single state buffer testing for sequence batcher
341+
# if [ "$USE_SINGLE_BUFFER" == "1" && "$IMPLICIT_STATE" == "1" ]; then
342+
# SED_REPLACE_PATTERN="N;N;N;N;N;/state.*dims:.*/a use_single_buffer: true"
343+
# (cd models0/$(basename $MODEL) && \
344+
# sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
345+
# (cd models1/$(basename $MODEL) && \
346+
# sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
347+
# (cd models2/$(basename $MODEL) && \
348+
# sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
349+
# (cd models4/$(basename $MODEL) && \
350+
# sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
351+
# (cd queue_delay_models/$(basename $MODEL)_full && \
352+
# sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
353+
# (cd queue_delay_models/$(basename $MODEL)_half && \
354+
# sed -i "$SED_REPLACE_PATTERN" config.pbtxt)
355+
# fi
355356
else
356357
cp -r $MODEL queue_delay_models/$(basename $MODEL)_full && \
357358
(cd queue_delay_models/$(basename $MODEL)_full && \

src/test/implicit_state/src/implicit_state.cc

+44-4
Original file line numberDiff line numberDiff line change
@@ -1060,12 +1060,17 @@ TRITONBACKEND_ModelInstanceExecute(
10601060
} break;
10611061
case 3: {
10621062
TRITONBACKEND_State* response_state;
1063-
std::vector<int64_t> shape{1};
1063+
size_t block_size = sizeof(int8_t) * 1024 * 1024;
1064+
int64_t current_elements =
1065+
(instance_state->request_index_ + 1) * 1024 * 1024;
1066+
std::cout << "current elements are "
1067+
<< (instance_state->request_index_ + 1) << std::endl;
1068+
std::vector<int64_t> shape{current_elements};
10641069
GUARDED_RESPOND_IF_ERROR(
10651070
responses, r, request,
10661071
TRITONBACKEND_StateNew(
10671072
&response_state, request, "OUTPUT_STATE",
1068-
TRITONSERVER_TYPE_INT32, shape.data() /* data */,
1073+
TRITONSERVER_TYPE_INT8, shape.data() /* data */,
10691074
shape.size() /* dim_count */));
10701075

10711076
if (responses[r] == nullptr) {
@@ -1081,8 +1086,6 @@ TRITONBACKEND_ModelInstanceExecute(
10811086
int64_t actual_memory_type_id = 0;
10821087
char* buffer;
10831088

1084-
size_t block_size = sizeof(int32_t) * 1024 * 1024;
1085-
10861089
// Request an output buffer in GPU. This is only for testing purposes
10871090
// to make sure that GPU output buffers can be requested.
10881091
GUARDED_RESPOND_IF_ERROR(
@@ -1098,6 +1101,43 @@ TRITONBACKEND_ModelInstanceExecute(
10981101
buffer + block_size * (instance_state->request_index_),
10991102
instance_state->request_index_, block_size);
11001103

1104+
TRITONBACKEND_Output* response_output;
1105+
GUARDED_RESPOND_IF_ERROR(
1106+
responses, r, request,
1107+
TRITONBACKEND_ResponseOutput(
1108+
responses[r], &response_output, "OUTPUT_STATE",
1109+
TRITONSERVER_TYPE_INT8, shape.data() /* data */,
1110+
shape.size() /* dim_count */));
1111+
1112+
actual_memory_type = TRITONSERVER_MEMORY_CPU;
1113+
actual_memory_type_id = 0;
1114+
char* output_buffer;
1115+
GUARDED_RESPOND_IF_ERROR(
1116+
responses, r, request,
1117+
TRITONBACKEND_OutputBuffer(
1118+
response_output, reinterpret_cast<void**>(&output_buffer),
1119+
block_size * (instance_state->request_index_ + 1),
1120+
&actual_memory_type, &actual_memory_type_id));
1121+
if ((responses[r] == nullptr) ||
1122+
(actual_memory_type != TRITONSERVER_MEMORY_CPU)) {
1123+
GUARDED_RESPOND_IF_ERROR(
1124+
responses, r, request,
1125+
TRITONSERVER_ErrorNew(
1126+
TRITONSERVER_ERROR_UNSUPPORTED,
1127+
"the backend can only handle CPU tensors"));
1128+
LOG_MESSAGE(
1129+
TRITONSERVER_LOG_ERROR,
1130+
(std::string("request ") + std::to_string(r) +
1131+
"the backend can only handle CPU tensors"
1132+
"response sent")
1133+
.c_str());
1134+
continue;
1135+
}
1136+
cudaMemcpy(
1137+
output_buffer, buffer,
1138+
block_size * (instance_state->request_index_ + 1),
1139+
cudaMemcpyDeviceToHost);
1140+
11011141
instance_state->state_ = buffer;
11021142
} break;
11031143
}

0 commit comments

Comments
 (0)