Skip to content

Commit 7982a53

Browse files
authored
Add error during rescheduling test to L0_generative_sequence (#6550)
1 parent a59f3cf commit 7982a53

File tree

3 files changed

+22
-3
lines changed

3 files changed

+22
-3
lines changed

qa/L0_generative_sequence/generative_sequence_e2e.py

+19-2
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,25 @@ def test_grpc_stream(self, sequence_id=0, sequence_start=False):
127127
self.assertEqual(res_count, data_item.as_numpy("OUTPUT")[0][0])
128128
self.assertEqual(0, res_count)
129129

130+
def test_reschedule_error(self):
131+
# Use short idle timeout (< backend reschedule delay: 0.5s) so that
132+
# the backend won't be able to reschedule the request as the scheduler
133+
# will terminate the sequence early
134+
config = r'"sequence_batching" : { "generative_sequence" : true, "max_sequence_idle_microseconds" : 200000 }'
135+
with grpcclient.InferenceServerClient("localhost:8001") as triton_client:
136+
triton_client.load_model(
137+
"generative_sequence", config=MODEL_CONFIG_BASE.format(config)
138+
)
139+
with self.assertRaises(InferenceServerException) as context:
140+
# Without specifying 'generative_sequence : true', the sequence
141+
# batcher expects sequence parameters to be provided explicitly
142+
self.test_grpc_stream()
143+
print(str(context.exception))
144+
self.assertTrue(
145+
"must specify the START flag on the first request of the sequence"
146+
in str(context.exception)
147+
)
148+
130149
def test_unsupported_sequence_scheduler(self):
131150
# Override model config with scheduler settings that do not support
132151
# request rescheduling.
@@ -145,7 +164,6 @@ def test_unsupported_sequence_scheduler(self):
145164
# batcher expects sequence parameters to be provided explicitly
146165
self.test_grpc_stream(sequence_id=sid, sequence_start=True)
147166
sid += 1
148-
print(str(context.exception))
149167
self.assertTrue(
150168
"Request is released with TRITONSERVER_REQUEST_RELEASE_RESCHEDULE"
151169
in str(context.exception)
@@ -164,7 +182,6 @@ def test_unsupported_dynamic_scheduler(self):
164182
)
165183
with self.assertRaises(InferenceServerException) as context:
166184
self.test_grpc_stream()
167-
print(str(context.exception))
168185
self.assertTrue(
169186
"Request is released with TRITONSERVER_REQUEST_RELEASE_RESCHEDULE"
170187
in str(context.exception)

qa/L0_generative_sequence/test.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ RET=0
4444

4545
CLIENT_LOG="./generative_sequence_client.log"
4646
TEST_PY=./generative_sequence_e2e.py
47-
EXPECTED_NUM_TESTS="4"
47+
EXPECTED_NUM_TESTS="5"
4848
TEST_RESULT_FILE='test_results.txt'
4949

5050

src/test/generative_sequence/src/generative_sequence.cc

+2
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,8 @@ TRITONBACKEND_ModelInstanceExecute(
533533
SET_TIMESTAMP(exec_end_ns);
534534
max_exec_end_ns = std::max(max_exec_end_ns, exec_end_ns);
535535

536+
// wait for 0.5 second before rescheduling the request.
537+
std::this_thread::sleep_for(std::chrono::milliseconds(500));
536538
// Release the request first as the testing backend may be configured to
537539
// receive error on request release, in such a case, the error will be
538540
// propagated back through error response.

0 commit comments

Comments
 (0)