@@ -257,7 +257,7 @@ PbTensor::DLPack(const py::object& stream)
257
257
// Here external tensor requests PbTensor's `__dlpack__` method to provide
258
258
// a PyCapsule. By the design of PbTensor, in a GPU case no pending work
259
259
// is scheduled to work with PbTensor's data and we can simply pass
260
- // the capsule.
260
+ // the capsule without a synchronization .
261
261
return this ->ToDLPack ();
262
262
}
263
263
@@ -339,14 +339,6 @@ PbTensor::FromDLPack(const std::string& name, const py::object& tensor)
339
339
if (py::isinstance<py::capsule>(tensor)) {
340
340
return FromDLPackCapsule (name, tensor);
341
341
} else if (py::hasattr (tensor, " __dlpack__" )) {
342
- #ifdef TRITON_ENABLE_GPU
343
- cudaError_t err = cudaStreamSynchronize (0 );
344
- if (err != cudaSuccess) {
345
- throw PythonBackendException (
346
- " Failed to syncronize on the default stream before\
347
- dlpack capsule consumption." );
348
- }
349
- #endif
350
342
// Array API requirements for the stream argument:
351
343
// stream = None, producer must assume the legacy default stream,
352
344
// stream = -1 is a signal for the producer not to perform any
@@ -373,20 +365,6 @@ std::shared_ptr<PbTensor>
373
365
PbTensor::FromDLPackCapsule (
374
366
const std::string& name, const py::capsule& dlpack_tensor)
375
367
{
376
-
377
- // TO-DO ADD sync on the default stream either here for all apis
378
- // or in __dlpack__ case for only new apis. Write tests and think about
379
- // different contexts.
380
-
381
- #ifdef TRITON_ENABLE_GPU
382
- cudaError_t err = cudaStreamSynchronize (0 );
383
- if (err != cudaSuccess) {
384
- throw PythonBackendException (
385
- " Failed to syncronize on the default stream before\
386
- dlpack capsule consumption." );
387
- }
388
- #endif
389
-
390
368
DLManagedTensor* dl_managed_tensor =
391
369
static_cast <DLManagedTensor*>(dlpack_tensor.get_pointer ());
392
370
0 commit comments