Skip to content

Commit de03484

Browse files
committed
Add testing for GPU tensor error handling
1 parent 8149a32 commit de03484

File tree

2 files changed

+71
-1
lines changed

2 files changed

+71
-1
lines changed

qa/L0_backend_python/python_test.py

+67-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/usr/bin/python
22

3-
# Copyright 2019-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
# Copyright 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
44
#
55
# Redistribution and use in source and binary forms, with or without
66
# modification, are permitted provided that the following conditions
@@ -38,6 +38,7 @@
3838
import os
3939

4040
from tritonclient.utils import *
41+
import tritonclient.utils.cuda_shared_memory as cuda_shared_memory
4142
import tritonclient.http as httpclient
4243

4344
TEST_JETSON = bool(int(os.environ.get('TEST_JETSON', 0)))
@@ -61,6 +62,13 @@ def _infer_help(self, model_name, shape, data_type):
6162
output0 = result.as_numpy('OUTPUT0')
6263
self.assertTrue(np.all(input_data_0 == output0))
6364

65+
def _create_cuda_region(self, client, size, name):
66+
shm0_handle = cuda_shared_memory.create_shared_memory_region(
67+
name, byte_size=size, device_id=0)
68+
client.register_cuda_shared_memory(
69+
name, cuda_shared_memory.get_raw_handle(shm0_handle), 0, size)
70+
return shm0_handle
71+
6472
def _optional_input_infer(self, model_name, has_input0, has_input1):
6573
with httpclient.InferenceServerClient("localhost:8000") as client:
6674
shape = (1,)
@@ -151,6 +159,64 @@ def test_growth_error(self):
151159
with self._shm_leak_detector.Probe() as shm_probe:
152160
self._infer_help(model_name, shape, dtype)
153161

162+
# CUDA Shared memory is not supported on jetson
163+
def test_gpu_tensor_error(self):
164+
model_name = 'identity_bool'
165+
with httpclient.InferenceServerClient("localhost:8000") as client:
166+
input_data = np.array([[True] * 1000], dtype=bool)
167+
inputs = [
168+
httpclient.InferInput("INPUT0", input_data.shape,
169+
np_to_triton_dtype(input_data.dtype))
170+
]
171+
inputs[0].set_data_from_numpy(input_data)
172+
173+
requested_outputs = [httpclient.InferRequestedOutput('OUTPUT0')]
174+
175+
# intentionally create a shared memory region with not enough size.
176+
client.unregister_cuda_shared_memory()
177+
shm0_handle = self._create_cuda_region(client, 1,
178+
'output0_data')
179+
180+
requested_outputs[0].set_shared_memory('output0_data', 1)
181+
with self.assertRaises(InferenceServerException) as ex:
182+
client.infer(model_name, inputs, outputs=requested_outputs)
183+
self.assertIn(
184+
"should be at least 1000 bytes to hold the results",
185+
str(ex.exception))
186+
client.unregister_cuda_shared_memory()
187+
cuda_shared_memory.destroy_shared_memory_region(shm0_handle)
188+
189+
def test_dlpack_tensor_error(self):
190+
model_name = 'dlpack_identity'
191+
with httpclient.InferenceServerClient("localhost:8000") as client:
192+
input_data = np.array([[1] * 1000], dtype=np.float32)
193+
inputs = [
194+
httpclient.InferInput("INPUT0", input_data.shape,
195+
np_to_triton_dtype(input_data.dtype))
196+
]
197+
198+
requested_outputs = [httpclient.InferRequestedOutput('OUTPUT0')]
199+
input_data_size = input_data.itemsize * input_data.size
200+
client.unregister_cuda_shared_memory()
201+
input_region = self._create_cuda_region(client, input_data_size,
202+
'input0_data')
203+
inputs[0].set_shared_memory('input0_data', input_data_size)
204+
cuda_shared_memory.set_shared_memory_region(
205+
input_region, [input_data])
206+
207+
# Intentionally create a small region to trigger an error
208+
shm0_handle = self._create_cuda_region(client, 1,
209+
'output0_data')
210+
requested_outputs[0].set_shared_memory('output0_data', 1)
211+
212+
with self.assertRaises(InferenceServerException) as ex:
213+
client.infer(model_name, inputs, outputs=requested_outputs)
214+
self.assertIn(
215+
"should be at least 4000 bytes to hold the results",
216+
str(ex.exception))
217+
client.unregister_cuda_shared_memory()
218+
cuda_shared_memory.destroy_shared_memory_region(shm0_handle)
219+
154220
def test_async_infer(self):
155221
model_name = "identity_uint8"
156222
request_parallelism = 4

qa/L0_backend_python/test.sh

+4
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,10 @@ mkdir -p models/string_fixed/1/
128128
cp ../python_models/string_fixed/model.py ./models/string_fixed/1/
129129
cp ../python_models/string_fixed/config.pbtxt ./models/string_fixed
130130

131+
mkdir -p models/dlpack_identity/1/
132+
cp ../python_models/dlpack_identity/model.py ./models/dlpack_identity/1/
133+
cp ../python_models/dlpack_identity/config.pbtxt ./models/dlpack_identity
134+
131135
# Skip torch install on Jetson since it is already installed.
132136
if [ "$TEST_JETSON" == "0" ]; then
133137
pip3 install torch==1.13.0+cpu -f https://download.pytorch.org/whl/torch_stable.html

0 commit comments

Comments
 (0)