Skip to content

Commit ecf1888

Browse files
committed
Removing synch on default stream since it is not needed
1 parent 0091ce5 commit ecf1888

File tree

2 files changed

+2
-24
lines changed

2 files changed

+2
-24
lines changed

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1223,7 +1223,7 @@ class TritonPythonModel:
12231223
# tensor.
12241224
input0 = pb_utils.Tensor.from_dlpack("INPUT0", to_dlpack(pytorch_tensor))
12251225
```
1226-
Starting from 23.04 release, Python backend allows tensors implementing
1226+
Python backend allows tensors implementing
12271227
[`__dlpack__`](https://data-apis.org/array-api/2022.12/API_specification/generated/array_api.array.__dlpack__.html)
12281228
and [`__dlpack_device__`](https://data-apis.org/array-api/2022.12/API_specification/generated/array_api.array.__dlpack_device__.html)
12291229
[interface](https://dmlc.github.io/dlpack/latest/python_spec.html)

src/pb_tensor.cc

+1-23
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ PbTensor::DLPack(const py::object& stream)
257257
// Here external tensor requests PbTensor's `__dlpack__` method to provide
258258
// a PyCapsule. By the design of PbTensor, in a GPU case no pending work
259259
// is scheduled to work with PbTensor's data and we can simply pass
260-
// the capsule.
260+
// the capsule without a synchronization.
261261
return this->ToDLPack();
262262
}
263263

@@ -339,14 +339,6 @@ PbTensor::FromDLPack(const std::string& name, const py::object& tensor)
339339
if (py::isinstance<py::capsule>(tensor)) {
340340
return FromDLPackCapsule(name, tensor);
341341
} else if (py::hasattr(tensor, "__dlpack__")) {
342-
#ifdef TRITON_ENABLE_GPU
343-
cudaError_t err = cudaStreamSynchronize(0);
344-
if (err != cudaSuccess) {
345-
throw PythonBackendException(
346-
"Failed to syncronize on the default stream before\
347-
dlpack capsule consumption.");
348-
}
349-
#endif
350342
// Array API requirements for the stream argument:
351343
// stream = None, producer must assume the legacy default stream,
352344
// stream = -1 is a signal for the producer not to perform any
@@ -373,20 +365,6 @@ std::shared_ptr<PbTensor>
373365
PbTensor::FromDLPackCapsule(
374366
const std::string& name, const py::capsule& dlpack_tensor)
375367
{
376-
377-
// TO-DO ADD sync on the default stream either here for all apis
378-
// or in __dlpack__ case for only new apis. Write tests and think about
379-
// different contexts.
380-
381-
#ifdef TRITON_ENABLE_GPU
382-
cudaError_t err = cudaStreamSynchronize(0);
383-
if (err != cudaSuccess) {
384-
throw PythonBackendException(
385-
"Failed to syncronize on the default stream before\
386-
dlpack capsule consumption.");
387-
}
388-
#endif
389-
390368
DLManagedTensor* dl_managed_tensor =
391369
static_cast<DLManagedTensor*>(dlpack_tensor.get_pointer());
392370

0 commit comments

Comments
 (0)