Skip to content

Commit f3610e4

Browse files
authored
fix: Fix memory leak with dlpack when using python Tensor objects (#421)
co-author: @tanmayv25
1 parent be3fa69 commit f3610e4

File tree

2 files changed

+153
-35
lines changed

2 files changed

+153
-35
lines changed

python/test/test_api.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,17 @@
2424
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2525
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2626

27+
import asyncio
28+
import copy
29+
import gc
2730
import json
2831
import os
2932
import shutil
33+
import sys
34+
import time
35+
import unittest
36+
from collections import Counter
37+
from contextlib import contextmanager
3038

3139
import numpy
3240
import pytest
@@ -296,6 +304,96 @@ def test_tensor_from_numpy(self):
296304
numpy.testing.assert_array_equal(torch_tensor.numpy(), cpu_array)
297305
assert torch_tensor.data_ptr() == cpu_array.ctypes.data
298306

307+
async def _tensor_from_numpy(self):
308+
owner = numpy.ones(2**27)
309+
tensor = tritonserver.Tensor.from_dlpack(owner)
310+
array = numpy.from_dlpack(tensor)
311+
del owner
312+
del tensor
313+
del array
314+
await asyncio.sleep(0.1)
315+
316+
async def _async_test_runs(self):
317+
tasks = []
318+
for _ in range(100):
319+
tasks.append(asyncio.create_task(self._tensor_from_numpy()))
320+
try:
321+
await asyncio.wait(tasks)
322+
except Exception as e:
323+
print(e)
324+
325+
@staticmethod
326+
@contextmanager
327+
def object_collector():
328+
gc.collect()
329+
objects_before = gc.get_objects()
330+
yield
331+
objects_after = gc.get_objects()
332+
new_objects = [type(x) for x in objects_after[len(objects_before) :]]
333+
tensor_objects = [
334+
x for x in objects_after if isinstance(x, tritonserver.Tensor)
335+
]
336+
if tensor_objects:
337+
print("Tensor objects")
338+
print(len(tensor_objects))
339+
print(type(tensor_objects[-1].memory_buffer.owner))
340+
print(
341+
f"\nTotal Collected Objects ({len(new_objects)}) {Counter(new_objects)}"
342+
)
343+
assert len(tensor_objects) == 0, "Leaked Tensors"
344+
345+
def test_cpu_memory_leak_async(self):
346+
with TestTensor.object_collector():
347+
asyncio.run(self._async_test_runs())
348+
349+
def test_cpu_memory_leak_sync(self):
350+
with TestTensor.object_collector():
351+
for _ in range(100):
352+
owner = numpy.ones(2**27)
353+
tensor = tritonserver.Tensor.from_dlpack(owner)
354+
array = numpy.from_dlpack(tensor)
355+
del owner
356+
del tensor
357+
del array
358+
359+
@pytest.mark.skipif(cupy is None, reason="Skipping gpu memory, cupy not installed")
360+
def test_gpu_memory_leak(self):
361+
with TestTensor.object_collector():
362+
for _ in range(100):
363+
owner = cupy.ones(2**27)
364+
tensor = tritonserver.Tensor.from_dlpack(owner)
365+
array = cupy.from_dlpack(tensor)
366+
del owner
367+
del tensor
368+
del array
369+
370+
def test_reference_counts(self):
371+
with TestTensor.object_collector():
372+
owner = numpy.ones(2**27)
373+
owner_data = owner.ctypes.data
374+
assert sys.getrefcount(owner) - 1 == 1, "Invalid Count"
375+
376+
tensor = tritonserver.Tensor.from_dlpack(owner)
377+
assert sys.getrefcount(owner) - 1 == 2, "Invalid Count"
378+
assert sys.getrefcount(tensor) - 1 == 1, "Invalid Count"
379+
del owner
380+
381+
numpy_array = numpy.from_dlpack(tensor)
382+
assert owner_data == numpy_array.ctypes.data
383+
assert sys.getrefcount(tensor) - 1 == 2, "Invalid Count"
384+
assert sys.getrefcount(numpy_array) - 1 == 1, "Invalid Count"
385+
386+
tensor.shape = [2, 2**26]
387+
388+
assert numpy_array.shape == (2**27,), "Invalid Shape"
389+
390+
numpy_array_2 = numpy.from_dlpack(tensor)
391+
del tensor
392+
assert owner_data == numpy_array.ctypes.data
393+
assert numpy_array_2.shape == (2, 2**26)
394+
del numpy_array
395+
del numpy_array_2
396+
299397

300398
class TestServer:
301399
def test_not_started(self):

python/tritonserver/_api/_tensor.py

Lines changed: 55 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -217,23 +217,8 @@ def __dlpack__(self, *, stream=None):
217217

218218
self._sync_on_requested_stream(stream)
219219

220-
dl_managed_tensor = Tensor._create_managed_tensor()
221-
dl_managed_tensor.dl_tensor.data = self.data_ptr
222-
dl_managed_tensor.dl_tensor.device = DLDevice(
223-
TRITON_MEMORY_TYPE_TO_DLPACK_DEVICE_TYPE[self.memory_type],
224-
self.memory_type_id,
225-
)
220+
dl_managed_tensor = self._create_managed_tensor()
226221

227-
dl_managed_tensor.dl_tensor.dtype = TRITON_TO_DLPACK_DTYPE[self.data_type]
228-
dl_managed_tensor.dl_tensor.ndim = len(self.shape)
229-
dl_managed_tensor.dl_tensor.shape = (ctypes.c_int64 * len(self.shape))(
230-
*self.shape
231-
)
232-
dl_managed_tensor.dl_tensor.strides = ctypes.POINTER(ctypes.c_int64)()
233-
dl_managed_tensor.dl_tensor.byte_offset = 0
234-
dl_managed_tensor.deleter = Tensor._managed_tensor_deleter
235-
236-
self._set_dlpack_manager_ctx(dl_managed_tensor)
237222
pycapsule = ctypes.pythonapi.PyCapsule_New(
238223
ctypes.byref(dl_managed_tensor),
239224
c_str_dltensor,
@@ -600,26 +585,39 @@ def _from_numpy(obj: numpy.ndarray | numpy.generic) -> Tensor:
600585
size=obj.itemsize * obj.size,
601586
owner=obj,
602587
)
603-
604588
return Tensor(data_type, shape, memory_buffer)
605589

606-
@staticmethod
607-
def _create_managed_tensor():
590+
def _create_managed_tensor(self) -> DLManagedTensor:
591+
# Allocates space for a managed tensor object
592+
# and fills in the fields
593+
#
594+
# To ensure the lifetime of the managed tensor we create a
595+
# context object that includes a newly created shape array and a
596+
# reference to self
597+
608598
size = ctypes.c_size_t(ctypes.sizeof(DLManagedTensor))
609599
address = ctypes.pythonapi.PyMem_RawMalloc(size)
610-
return DLManagedTensor.from_address(address)
600+
dl_managed_tensor = DLManagedTensor.from_address(address)
601+
dl_managed_tensor.dl_tensor.data = self.data_ptr
602+
dl_managed_tensor.dl_tensor.device = DLDevice(
603+
TRITON_MEMORY_TYPE_TO_DLPACK_DEVICE_TYPE[self.memory_type],
604+
self.memory_type_id,
605+
)
606+
dl_managed_tensor.dl_tensor.dtype = TRITON_TO_DLPACK_DTYPE[self.data_type]
607+
dl_managed_tensor.dl_tensor.ndim = len(self.shape)
608+
manager_ctx = _ManagerCtx(self)
609+
dl_managed_tensor.dl_tensor.shape = manager_ctx.shape
610+
dl_managed_tensor.dl_tensor.strides = manager_ctx.strides
611+
dl_managed_tensor.dl_tensor.byte_offset = 0
612+
dl_managed_tensor.deleter = Tensor._managed_tensor_deleter
613+
dl_managed_tensor.manager_ctx = manager_ctx.reference()
614+
return dl_managed_tensor
611615

612616
@staticmethod
613617
@ctypes.CFUNCTYPE(None, ctypes.c_void_p)
614618
def _managed_tensor_deleter(handle: int) -> None:
615619
dl_managed_tensor = DLManagedTensor.from_address(handle)
616-
tensor_obj_ptr = ctypes.cast(
617-
dl_managed_tensor.manager_ctx, ctypes.POINTER(ctypes.py_object)
618-
)
619-
tensor_obj = tensor_obj_ptr.contents
620-
ctypes.pythonapi.Py_DecRef(tensor_obj)
621-
shape_obj = ctypes.py_object(dl_managed_tensor.dl_tensor.shape)
622-
ctypes.pythonapi.Py_DecRef(shape_obj)
620+
_ManagerCtx.release(dl_managed_tensor.manager_ctx)
623621
ctypes.pythonapi.PyMem_RawFree(handle)
624622

625623
@staticmethod
@@ -639,14 +637,36 @@ def _pycapsule_deleter(handle: ctypes.c_void_p) -> None:
639637
print(f"Exception occurred while deleting capsule: {e}")
640638
raise e
641639

642-
def _set_dlpack_manager_ctx(self, dl_managed_tensor):
643-
tensor_obj = ctypes.py_object(self)
644-
tensor_obj_ptr = ctypes.pointer(tensor_obj)
645-
dl_managed_tensor.manager_ctx = ctypes.cast(tensor_obj_ptr, ctypes.c_void_p)
646-
shape_obj = ctypes.py_object(dl_managed_tensor.dl_tensor.shape)
647-
ctypes.pythonapi.Py_IncRef(tensor_obj)
648-
ctypes.pythonapi.Py_IncRef(shape_obj)
649-
650640
_from_converters: ClassVar[dict[type, Callable[[Any], Tensor]]] = dict(
651641
{numpy.ndarray: _from_numpy, numpy.generic: _from_numpy, list: _from_list},
652642
)
643+
644+
645+
class _ManagerCtx:
646+
# To ensure the lifetime of the managed tensor we create a
647+
# context object that includes a newly created shape array and a
648+
# reference to self
649+
650+
def __init__(self, tensor: Tensor) -> None:
651+
self._tensor = tensor
652+
self.shape = (ctypes.c_int64 * len(tensor.shape))(*tensor.shape)
653+
self.strides = ctypes.POINTER(ctypes.c_int64)()
654+
655+
def reference(self) -> ctypes.c_void_p:
656+
py_obj = ctypes.py_object(self)
657+
ctypes.pythonapi.Py_IncRef(py_obj)
658+
659+
# Note: Could not find a direct way to cast a python object
660+
# to a c_void_p. The mechanism is to either use id(self) or
661+
# cast as described here:
662+
#
663+
# https://groups.google.com/g/dev-python/c/QRRqVC7gkf4/m/zH7l1gTXBwAJ
664+
#
665+
# To avoid relying on the behavior of id() we use the casting mechanism
666+
667+
return ctypes.POINTER(ctypes.c_void_p)(py_obj)[0]
668+
669+
@staticmethod
670+
def release(reference: ctypes.c_void_p) -> None:
671+
py_obj = ctypes.cast(reference, ctypes.py_object)
672+
ctypes.pythonapi.Py_DecRef(py_obj)

0 commit comments

Comments
 (0)