Skip to content

Commit 345023e

Browse files
committed
More tests and clarifications
1 parent 8d96d09 commit 345023e

File tree

1 file changed

+39
-7
lines changed

1 file changed

+39
-7
lines changed

qa/python_models/dlpack_test/model.py

+39-7
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,8 @@ def test_bool_datatype(self):
176176
np.array_equal(bool_array, bool_tensor_dlpack.as_numpy()))
177177

178178
def test_cuda_multi_stream(self):
179+
# Test that external stream syncs with the default
180+
# and pb_tensor has proper data
179181
s1 = torch.cuda.Stream()
180182
size = 5000
181183
pytorch_tensor = torch.tensor([0,0,0,0], device='cuda')
@@ -192,11 +194,13 @@ def test_cuda_multi_stream(self):
192194
self.assertTrue(torch.equal(pytorch_tensor_dlpack, expected_output))
193195

194196
def test_cuda_non_blocking_multi_stream(self):
195-
s1 = cp.cuda.Stream(non_blocking=True)
197+
# Test that external non-blocking stream syncs with the default stream
198+
# and pb_tensor has proper data
199+
non_blocking_stream = cp.cuda.Stream(non_blocking=True)
196200
size = 5000
197201
cupy_tensor = cp.array([0,0,0,0])
198202
expected_output = cp.array([2,2,2,2])
199-
with s1:
203+
with non_blocking_stream:
200204
matrix_a = cp.random.rand(size,size)
201205
res = cp.matmul(matrix_a,matrix_a)
202206
for _ in range(1000):
@@ -205,16 +209,42 @@ def test_cuda_non_blocking_multi_stream(self):
205209

206210
pb_tensor = pb_utils.Tensor.from_dlpack('tensor', cupy_tensor)
207211
# Verify that non-blocking stream has no pending jobs left
208-
self.assertTrue(s1.done)
212+
self.assertTrue(non_blocking_stream.done)
209213
cupy_tensor_dlpack = cp.from_dlpack(pb_tensor)
210214
self.assertTrue(cp.array_equal(cupy_tensor_dlpack, expected_output))
211215
self.assertFalse(pb_tensor.is_cpu())
212216
self.assertEqual(
213217
pb_tensor.__dlpack_device__(), cupy_tensor.__dlpack_device__())
214218

215219
def test_cuda_non_blocking_multi_gpu(self):
220+
# Test that pb_tensor on different device
216221
size = 5000
217222
expected_output = cp.array([2,2,2,2])
223+
expected_dlpack_device = (2, 1) # DLDeviceType::kDLCUDA, device_id 1
224+
with cp.cuda.Device(1):
225+
non_blocking_stream = cp.cuda.Stream(non_blocking=True)
226+
with non_blocking_stream:
227+
cupy_tensor = cp.array([0,0,0,0])
228+
matrix_a = cp.random.rand(size,size)
229+
res = cp.matmul(matrix_a,matrix_a)
230+
for _ in range(1000):
231+
res = cp.matmul(res,matrix_a)
232+
cupy_tensor += cp.array([2,2,2,2])
233+
with cp.cuda.Device(0):
234+
pb_tensor = pb_utils.Tensor.from_dlpack('tensor', cupy_tensor)
235+
cupy_tensor_dlpack = cp.from_dlpack(pb_tensor)
236+
237+
self.assertTrue(cp.array_equal(cupy_tensor_dlpack, expected_output))
238+
self.assertFalse(pb_tensor.is_cpu())
239+
self.assertEqual(pb_tensor.__dlpack_device__(), expected_dlpack_device)
240+
self.assertEqual(
241+
pb_tensor.__dlpack_device__(), cupy_tensor.__dlpack_device__())
242+
243+
def test_cuda_multi_gpu(self):
244+
# Test that pb_tensor on different device
245+
size = 5000
246+
expected_output = cp.array([2,2,2,2])
247+
expected_dlpack_device = (2, 1) # DLDeviceType::kDLCUDA, device_id 1
218248
with cp.cuda.Device(1):
219249
cupy_tensor = cp.array([0,0,0,0])
220250
matrix_a = cp.random.rand(size,size)
@@ -225,10 +255,12 @@ def test_cuda_non_blocking_multi_gpu(self):
225255
with cp.cuda.Device(0):
226256
pb_tensor = pb_utils.Tensor.from_dlpack('tensor', cupy_tensor)
227257
cupy_tensor_dlpack = cp.from_dlpack(pb_tensor)
228-
self.assertTrue(cp.array_equal(cupy_tensor_dlpack, expected_output))
229-
self.assertFalse(pb_tensor.is_cpu())
230-
self.assertEqual(
231-
pb_tensor.__dlpack_device__(), cupy_tensor.__dlpack_device__())
258+
259+
self.assertTrue(cp.array_equal(cupy_tensor_dlpack, expected_output))
260+
self.assertFalse(pb_tensor.is_cpu())
261+
self.assertEqual(pb_tensor.__dlpack_device__(), expected_dlpack_device)
262+
self.assertEqual(
263+
pb_tensor.__dlpack_device__(), cupy_tensor.__dlpack_device__())
232264

233265

234266

0 commit comments

Comments
 (0)