Skip to content

Commit e0dac5d

Browse files
authored
Add pylibcudf.null_mask.null_count (#17711)
A small step to not have `null_count` tied to `cudf._lib.column.Column` Authors: - Matthew Roeschke (https://github.com/mroeschke) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Matthew Murray (https://github.com/Matt711) URL: #17711
1 parent 41215e2 commit e0dac5d

File tree

5 files changed

+39
-19
lines changed

5 files changed

+39
-19
lines changed

python/cudf/cudf/_lib/column.pxd

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ from pylibcudf.libcudf.column.column_view cimport (
1313
from pylibcudf.libcudf.types cimport size_type
1414
from rmm.librmm.device_buffer cimport device_buffer
1515

16-
cdef dtype_from_lists_column_view(column_view cv)
1716
cdef dtype_from_column_view(column_view cv)
1817

1918
cdef class Column:
@@ -42,5 +41,3 @@ cdef class Column:
4241

4342
@staticmethod
4443
cdef Column from_column_view(column_view, object)
45-
46-
cdef size_type compute_null_count(self) except? 0

python/cudf/cudf/_lib/column.pyx

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ from pylibcudf.libcudf.column.column_factories cimport (
4343
)
4444
from pylibcudf.libcudf.column.column_view cimport column_view
4545
from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
46-
from pylibcudf.libcudf.null_mask cimport null_count as cpp_null_count
4746
from pylibcudf.libcudf.scalar.scalar cimport scalar
4847

4948
from cudf._lib.scalar cimport DeviceScalar
@@ -346,7 +345,15 @@ cdef class Column:
346345
@property
347346
def null_count(self):
348347
if self._null_count is None:
349-
self._null_count = self.compute_null_count()
348+
if not self.nullable or self.size == 0:
349+
self._null_count = 0
350+
else:
351+
with acquire_spill_lock():
352+
self._null_count = pylibcudf.null_mask.null_count(
353+
self.base_mask.get_ptr(mode="read"),
354+
self.offset,
355+
self.offset + self.size
356+
)
350357
return self._null_count
351358

352359
@property
@@ -410,18 +417,6 @@ cdef class Column:
410417
else:
411418
return other_col
412419

413-
cdef libcudf_types.size_type compute_null_count(self) except? 0:
414-
with acquire_spill_lock():
415-
if not self.nullable:
416-
return 0
417-
return cpp_null_count(
418-
<libcudf_types.bitmask_type*><uintptr_t>(
419-
self.base_mask.get_ptr(mode="read")
420-
),
421-
self.offset,
422-
self.offset + self.size
423-
)
424-
425420
cdef mutable_column_view mutable_view(self) except *:
426421
if isinstance(self.dtype, cudf.CategoricalDtype):
427422
col = self.base_children[0]

python/pylibcudf/pylibcudf/null_mask.pxd

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2024, NVIDIA CORPORATION.
1+
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
22

33
from pylibcudf.libcudf.types cimport mask_state, size_type
44

@@ -16,3 +16,5 @@ cpdef DeviceBuffer create_null_mask(size_type size, mask_state state = *)
1616
cpdef tuple bitmask_and(list columns)
1717

1818
cpdef tuple bitmask_or(list columns)
19+
20+
cpdef size_type null_count(Py_ssize_t bitmask, size_type start, size_type stop)

python/pylibcudf/pylibcudf/null_mask.pyi

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,4 @@ def create_null_mask(
1212
) -> DeviceBuffer: ...
1313
def bitmask_and(columns: list[Column]) -> tuple[DeviceBuffer, int]: ...
1414
def bitmask_or(columns: list[Column]) -> tuple[DeviceBuffer, int]: ...
15+
def null_count(bitmask: int, start: int, stop: int) -> int: ...

python/pylibcudf/pylibcudf/null_mask.pyx

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
1-
# Copyright (c) 2024, NVIDIA CORPORATION.
1+
# Copyright (c) 2024-2025, NVIDIA CORPORATION.
22

33
from libcpp.memory cimport make_unique
44
from libcpp.pair cimport pair
55
from libcpp.utility cimport move
66
from pylibcudf.libcudf cimport null_mask as cpp_null_mask
77
from pylibcudf.libcudf.types cimport mask_state, size_type
8+
from pylibcudf.utils cimport int_to_bitmask_ptr
89

910
from rmm.librmm.device_buffer cimport device_buffer
1011
from rmm.pylibrmm.device_buffer cimport DeviceBuffer
@@ -20,6 +21,7 @@ __all__ = [
2021
"bitmask_or",
2122
"copy_bitmask",
2223
"create_null_mask",
24+
"null_count",
2325
]
2426

2527
cdef DeviceBuffer buffer_to_python(device_buffer buf):
@@ -148,3 +150,26 @@ cpdef tuple bitmask_or(list columns):
148150
c_result = cpp_null_mask.bitmask_or(c_table.view())
149151

150152
return buffer_to_python(move(c_result.first)), c_result.second
153+
154+
155+
cpdef size_type null_count(Py_ssize_t bitmask, size_type start, size_type stop):
156+
"""Given a validity bitmask, counts the number of null elements.
157+
158+
For details, see :cpp:func:`null_count`.
159+
160+
Parameters
161+
----------
162+
bitmask : int
163+
Integer pointer to the bitmask.
164+
start : int
165+
Index of the first bit to count (inclusive).
166+
stop : int
167+
Index of the last bit to count (exclusive).
168+
169+
Returns
170+
-------
171+
int
172+
The number of null elements in the specified range.
173+
"""
174+
with nogil:
175+
return cpp_null_mask.null_count(int_to_bitmask_ptr(bitmask), start, stop)

0 commit comments

Comments
 (0)