Skip to content

Commit 23e7d78

Browse files
Tabrizianoandreeva-nv
authored andcommitted
Add testing for GPU tensor error handling (#5871)
* Add testing for GPU tensor error handling * Fix up * Remove exit 0 * Fix jetson * Fix up
1 parent f9a80d4 commit 23e7d78

File tree

2 files changed

+81
-1
lines changed

2 files changed

+81
-1
lines changed

qa/L0_backend_python/python_test.py

+73
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@
4040
from tritonclient.utils import *
4141
import tritonclient.http as httpclient
4242

43+
TEST_JETSON = bool(int(os.environ.get('TEST_JETSON', 0)))
44+
4345

4446
class PythonTest(tu.TestResultCollector):
4547

@@ -59,6 +61,14 @@ def _infer_help(self, model_name, shape, data_type):
5961
output0 = result.as_numpy('OUTPUT0')
6062
self.assertTrue(np.all(input_data_0 == output0))
6163

64+
def _create_cuda_region(self, client, size, name):
65+
import tritonclient.utils.cuda_shared_memory as cuda_shared_memory
66+
shm0_handle = cuda_shared_memory.create_shared_memory_region(
67+
name, byte_size=size, device_id=0)
68+
client.register_cuda_shared_memory(
69+
name, cuda_shared_memory.get_raw_handle(shm0_handle), 0, size)
70+
return shm0_handle
71+
6272
def _optional_input_infer(self, model_name, has_input0, has_input1):
6373
with httpclient.InferenceServerClient("localhost:8000") as client:
6474
shape = (1,)
@@ -144,6 +154,69 @@ def test_growth_error(self):
144154
with self._shm_leak_detector.Probe() as shm_probe:
145155
self._infer_help(model_name, shape, dtype)
146156

157+
# GPU tensors are not supported on jetson
158+
# CUDA Shared memory is not supported on jetson
159+
if not TEST_JETSON:
160+
161+
def test_gpu_tensor_error(self):
162+
import tritonclient.utils.cuda_shared_memory as cuda_shared_memory
163+
model_name = 'identity_bool'
164+
with httpclient.InferenceServerClient("localhost:8000") as client:
165+
input_data = np.array([[True] * 1000], dtype=bool)
166+
inputs = [
167+
httpclient.InferInput("INPUT0", input_data.shape,
168+
np_to_triton_dtype(input_data.dtype))
169+
]
170+
inputs[0].set_data_from_numpy(input_data)
171+
172+
requested_outputs = [httpclient.InferRequestedOutput('OUTPUT0')]
173+
174+
# intentionally create a shared memory region with not enough size.
175+
client.unregister_cuda_shared_memory()
176+
shm0_handle = self._create_cuda_region(client, 1,
177+
'output0_data')
178+
179+
requested_outputs[0].set_shared_memory('output0_data', 1)
180+
with self.assertRaises(InferenceServerException) as ex:
181+
client.infer(model_name, inputs, outputs=requested_outputs)
182+
self.assertIn(
183+
"should be at least 1000 bytes to hold the results",
184+
str(ex.exception))
185+
client.unregister_cuda_shared_memory()
186+
cuda_shared_memory.destroy_shared_memory_region(shm0_handle)
187+
188+
def test_dlpack_tensor_error(self):
189+
import tritonclient.utils.cuda_shared_memory as cuda_shared_memory
190+
model_name = 'dlpack_identity'
191+
with httpclient.InferenceServerClient("localhost:8000") as client:
192+
input_data = np.array([[1] * 1000], dtype=np.float32)
193+
inputs = [
194+
httpclient.InferInput("INPUT0", input_data.shape,
195+
np_to_triton_dtype(input_data.dtype))
196+
]
197+
198+
requested_outputs = [httpclient.InferRequestedOutput('OUTPUT0')]
199+
input_data_size = input_data.itemsize * input_data.size
200+
client.unregister_cuda_shared_memory()
201+
input_region = self._create_cuda_region(client, input_data_size,
202+
'input0_data')
203+
inputs[0].set_shared_memory('input0_data', input_data_size)
204+
cuda_shared_memory.set_shared_memory_region(
205+
input_region, [input_data])
206+
207+
# Intentionally create a small region to trigger an error
208+
shm0_handle = self._create_cuda_region(client, 1,
209+
'output0_data')
210+
requested_outputs[0].set_shared_memory('output0_data', 1)
211+
212+
with self.assertRaises(InferenceServerException) as ex:
213+
client.infer(model_name, inputs, outputs=requested_outputs)
214+
self.assertIn(
215+
"should be at least 4000 bytes to hold the results",
216+
str(ex.exception))
217+
client.unregister_cuda_shared_memory()
218+
cuda_shared_memory.destroy_shared_memory_region(shm0_handle)
219+
147220
def test_async_infer(self):
148221
model_name = "identity_uint8"
149222
request_parallelism = 4

qa/L0_backend_python/test.sh

+8-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ SERVER_ARGS="$BASE_SERVER_ARGS --backend-config=python,shm-default-byte-size=524
5353
PYTHON_BACKEND_BRANCH=$PYTHON_BACKEND_REPO_TAG
5454
CLIENT_PY=./python_test.py
5555
CLIENT_LOG="./client.log"
56-
EXPECTED_NUM_TESTS="9"
56+
EXPECTED_NUM_TESTS="11"
5757
TEST_RESULT_FILE='test_results.txt'
5858
SERVER_LOG="./inference_server.log"
5959
source ../common/util.sh
@@ -128,9 +128,16 @@ mkdir -p models/string_fixed/1/
128128
cp ../python_models/string_fixed/model.py ./models/string_fixed/1/
129129
cp ../python_models/string_fixed/config.pbtxt ./models/string_fixed
130130

131+
mkdir -p models/dlpack_identity/1/
132+
cp ../python_models/dlpack_identity/model.py ./models/dlpack_identity/1/
133+
cp ../python_models/dlpack_identity/config.pbtxt ./models/dlpack_identity
134+
131135
# Skip torch install on Jetson since it is already installed.
132136
if [ "$TEST_JETSON" == "0" ]; then
133137
pip3 install torch==1.13.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
138+
else
139+
# GPU tensor tests are disabled on jetson
140+
EXPECTED_NUM_TESTS=9
134141
fi
135142

136143
prev_num_pages=`get_shm_pages`

0 commit comments

Comments
 (0)