Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement exposed null mask APIs in pylibcudf #15908

Merged
merged 21 commits into from
Aug 30, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
90b8a13
Initial commit
charlesbluca Jun 3, 2024
2900c84
Get build and import passing
charlesbluca Jun 3, 2024
177ec9b
Merge remote-tracking branch 'upstream/branch-24.08' into pylibcudf-n…
charlesbluca Jun 3, 2024
e717782
Add copy_bitmask test
charlesbluca Jun 3, 2024
129f89b
Add bitmask_allocation_size_bytes test
charlesbluca Jun 3, 2024
893ba26
Merge remote-tracking branch 'upstream/branch-24.08' into pylibcudf-n…
charlesbluca Jun 4, 2024
cb56d50
Add create_null_mask test
charlesbluca Jun 4, 2024
48ec963
Merge remote-tracking branch 'upstream/branch-24.08' into pylibcudf-n…
charlesbluca Jun 13, 2024
fc4bf7e
Add docstrings
charlesbluca Jun 13, 2024
b9a1919
Add doc pages
charlesbluca Jun 13, 2024
b80b99e
Consolidate buffer wrapping logic with helper function
charlesbluca Jul 2, 2024
4a53d7b
Drop size_type cimport from python code
charlesbluca Jul 2, 2024
793549a
Merge remote-tracking branch 'upstream/branch-24.08' into pylibcudf-n…
charlesbluca Jul 2, 2024
926dddd
Add DeviceBuffer to _reftarget_aliases
charlesbluca Jul 2, 2024
975dc15
Add 'size_t' to _names_to_skip_in_pylibcudf
charlesbluca Jul 2, 2024
d7febde
Merge branch 'branch-24.08' into pylibcudf-null_mask
charlesbluca Jul 3, 2024
e9815a9
Merge branch 'branch-24.08' into pylibcudf-null_mask
charlesbluca Jul 8, 2024
ef3a1b5
Merge branch 'branch-24.08' into pylibcudf-null_mask
vyasr Aug 16, 2024
425dcae
Merge branch 'branch-24.10' into pylibcudf-null_mask
vyasr Aug 16, 2024
46954ee
Merge remote-tracking branch 'upstream/branch-24.10' into pylibcudf-n…
vyasr Aug 29, 2024
167de9f
Merge branch 'branch-24.10' into pylibcudf-null_mask
vyasr Aug 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ This page provides API documentation for pylibcudf.
join
lists
merge
null_mask
quantiles
reduce
reshape
Expand Down
6 changes: 6 additions & 0 deletions docs/cudf/source/user_guide/api_docs/pylibcudf/null_mask.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
=========
null_mask
=========

.. automodule:: cudf._lib.pylibcudf.null_mask
:members:
100 changes: 18 additions & 82 deletions python/cudf/cudf/_lib/null_mask.pyx
Original file line number Diff line number Diff line change
@@ -1,38 +1,11 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

from enum import Enum

from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer

from cudf._lib import pylibcudf
from cudf._lib.pylibcudf.null_mask import MaskState
from cudf.core.buffer import acquire_spill_lock, as_buffer

from libcpp.memory cimport make_unique, unique_ptr
from libcpp.pair cimport pair
from libcpp.utility cimport move

from cudf._lib.column cimport Column
from cudf._lib.pylibcudf.libcudf.column.column_view cimport column_view
from cudf._lib.pylibcudf.libcudf.null_mask cimport (
bitmask_allocation_size_bytes as cpp_bitmask_allocation_size_bytes,
bitmask_and as cpp_bitmask_and,
bitmask_or as cpp_bitmask_or,
copy_bitmask as cpp_copy_bitmask,
create_null_mask as cpp_create_null_mask,
underlying_type_t_mask_state,
)
from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view
from cudf._lib.pylibcudf.libcudf.types cimport mask_state, size_type
from cudf._lib.utils cimport table_view_from_columns


class MaskState(Enum):
"""
Enum for null mask creation state
"""
UNALLOCATED = <underlying_type_t_mask_state> mask_state.UNALLOCATED
UNINITIALIZED = <underlying_type_t_mask_state> mask_state.UNINITIALIZED
ALL_VALID = <underlying_type_t_mask_state> mask_state.ALL_VALID
ALL_NULL = <underlying_type_t_mask_state> mask_state.ALL_NULL
from cudf._lib.pylibcudf.libcudf.types cimport size_type
charlesbluca marked this conversation as resolved.
Show resolved Hide resolved


@acquire_spill_lock()
Expand All @@ -44,15 +17,7 @@ def copy_bitmask(Column col):
if col.base_mask is None:
return None

cdef column_view col_view = col.view()
cdef device_buffer db
cdef unique_ptr[device_buffer] up_db

with nogil:
db = move(cpp_copy_bitmask(col_view))
up_db = move(make_unique[device_buffer](move(db)))

rmm_db = DeviceBuffer.c_from_unique_ptr(move(up_db))
rmm_db = pylibcudf.null_mask.copy_bitmask(col.to_pylibcudf(mode="read"))
buf = as_buffer(rmm_db)
return buf

Expand All @@ -62,12 +27,7 @@ def bitmask_allocation_size_bytes(size_type num_bits):
Given a size, calculates the number of bytes that should be allocated for a
column validity mask
"""
cdef size_t output_size

with nogil:
output_size = cpp_bitmask_allocation_size_bytes(num_bits)

return output_size
return pylibcudf.null_mask.bitmask_allocation_size_bytes(num_bits)


def create_null_mask(size_type size, state=MaskState.UNINITIALIZED):
Expand All @@ -82,48 +42,24 @@ def create_null_mask(size_type size, state=MaskState.UNINITIALIZED):
state : ``MaskState``, default ``MaskState.UNINITIALIZED``
State the null mask should be created in
"""
if not isinstance(state, MaskState):
raise TypeError(
"`state` is required to be of type `MaskState`, got "
+ (type(state).__name__)
)

cdef device_buffer db
cdef unique_ptr[device_buffer] up_db
cdef mask_state c_mask_state = <mask_state>(
<underlying_type_t_mask_state>(state.value)
)

with nogil:
db = move(cpp_create_null_mask(size, c_mask_state))
up_db = move(make_unique[device_buffer](move(db)))

rmm_db = DeviceBuffer.c_from_unique_ptr(move(up_db))
rmm_db = pylibcudf.null_mask.create_null_mask(size, state)
buf = as_buffer(rmm_db)
return buf


@acquire_spill_lock()
def bitmask_and(columns: list):
cdef table_view c_view = table_view_from_columns(columns)
cdef pair[device_buffer, size_type] c_result
cdef unique_ptr[device_buffer] up_db
with nogil:
c_result = move(cpp_bitmask_and(c_view))
up_db = move(make_unique[device_buffer](move(c_result.first)))
dbuf = DeviceBuffer.c_from_unique_ptr(move(up_db))
buf = as_buffer(dbuf)
return buf, c_result.second
def bitmask_and(list columns):
rmm_db, other = pylibcudf.null_mask.bitmask_and(
[col.to_pylibcudf(mode="read") for col in columns]
)
buf = as_buffer(rmm_db)
return buf, other


@acquire_spill_lock()
def bitmask_or(columns: list):
cdef table_view c_view = table_view_from_columns(columns)
cdef pair[device_buffer, size_type] c_result
cdef unique_ptr[device_buffer] up_db
with nogil:
c_result = move(cpp_bitmask_or(c_view))
up_db = move(make_unique[device_buffer](move(c_result.first)))
dbuf = DeviceBuffer.c_from_unique_ptr(move(up_db))
buf = as_buffer(dbuf)
return buf, c_result.second
def bitmask_or(list columns):
rmm_db, other = pylibcudf.null_mask.bitmask_or(
[col.to_pylibcudf(mode="read") for col in columns]
)
buf = as_buffer(rmm_db)
return buf, other
1 change: 1 addition & 0 deletions python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ set(cython_sources
join.pyx
lists.pyx
merge.pyx
null_mask.pyx
quantiles.pyx
reduce.pyx
replace.pyx
Expand Down
2 changes: 2 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/__init__.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ from . cimport (
join,
lists,
merge,
null_mask,
quantiles,
reduce,
replace,
Expand Down Expand Up @@ -51,6 +52,7 @@ __all__ = [
"join",
"lists",
"merge",
"null_mask",
"quantiles",
"reduce",
"replace",
Expand Down
2 changes: 2 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
join,
lists,
merge,
null_mask,
quantiles,
reduce,
replace,
Expand Down Expand Up @@ -51,6 +52,7 @@
"join",
"lists",
"merge",
"null_mask",
"quantiles",
"reduce",
"replace",
Expand Down
2 changes: 0 additions & 2 deletions python/cudf/cudf/_lib/pylibcudf/libcudf/null_mask.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ from cudf._lib.pylibcudf.libcudf.types cimport (
size_type,
)

ctypedef int32_t underlying_type_t_mask_state


cdef extern from "cudf/null_mask.hpp" namespace "cudf" nogil:
cdef device_buffer copy_bitmask "cudf::copy_bitmask" (
Expand Down
18 changes: 18 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/null_mask.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from rmm._lib.device_buffer cimport DeviceBuffer

from cudf._lib.pylibcudf.libcudf.types cimport mask_state, size_type

from .column cimport Column


cpdef DeviceBuffer copy_bitmask(Column col)

cpdef size_t bitmask_allocation_size_bytes(size_type number_of_bits)

cpdef DeviceBuffer create_null_mask(size_type size, mask_state state = *)

cpdef tuple bitmask_and(list columns)

cpdef tuple bitmask_or(list columns)
157 changes: 157 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/null_mask.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from libcpp.memory cimport make_unique, unique_ptr
from libcpp.pair cimport pair
from libcpp.utility cimport move

from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer

from cudf._lib.pylibcudf.libcudf cimport null_mask as cpp_null_mask
from cudf._lib.pylibcudf.libcudf.types cimport mask_state, size_type

from cudf._lib.pylibcudf.libcudf.types import \
mask_state as MaskState # no-cython-lint

from .column cimport Column
from .table cimport Table


cpdef DeviceBuffer copy_bitmask(Column col):
"""Copies ``col``'s bitmask into a ``DeviceBuffer``.

For details, see :cpp:func:`copy_bitmask`.

Parameters
----------
col : Column
Column whose bitmask needs to be copied

Returns
-------
rmm.DeviceBuffer
A ``DeviceBuffer`` containing ``col``'s bitmask, or an empty ``DeviceBuffer``
if ``col`` is not nullable
"""
cdef device_buffer db
cdef unique_ptr[device_buffer] up_db

with nogil:
db = move(cpp_null_mask.copy_bitmask(col.view()))
up_db = move(make_unique[device_buffer](move(db)))
charlesbluca marked this conversation as resolved.
Show resolved Hide resolved

return DeviceBuffer.c_from_unique_ptr(move(up_db))


cpdef size_t bitmask_allocation_size_bytes(size_type number_of_bits):
"""
Computes the required bytes necessary to represent the specified number of bits
with a 64B padding boundary.

For details, see :cpp:func:`bitmask_allocation_size_bytes`.

Parameters
----------
number_of_bits : size_type
The number of bits that need to be represented

Returns
-------
size_t
The necessary number of bytes
"""
cdef size_t output_size

with nogil:
output_size = cpp_null_mask.bitmask_allocation_size_bytes(number_of_bits)

return output_size
charlesbluca marked this conversation as resolved.
Show resolved Hide resolved


cpdef DeviceBuffer create_null_mask(
size_type size,
mask_state state = mask_state.UNINITIALIZED
):
"""Creates a ``DeviceBuffer`` for use as a null value indicator bitmask of a
``Column``.

For details, see :cpp:func:`create_null_mask`.

Parameters
----------
size : size_type
The number of elements to be represented by the mask
state : mask_state, optional
The desired state of the mask. Can be one of { MaskState.UNALLOCATED,
MaskState.UNINITIALIZED, MaskState.ALL_VALID, MaskState.ALL_NULL }
(default MaskState.UNINITIALIZED)

Returns
-------
rmm.DeviceBuffer
A ``DeviceBuffer`` for use as a null bitmask satisfying the desired size and
state
"""
cdef device_buffer db
cdef unique_ptr[device_buffer] up_db

with nogil:
db = move(cpp_null_mask.create_null_mask(size, state))
up_db = move(make_unique[device_buffer](move(db)))

return DeviceBuffer.c_from_unique_ptr(move(up_db))


cpdef tuple bitmask_and(list columns):
"""Performs bitwise AND of the bitmasks of a list of columns.

For details, see :cpp:func:`bitmask_and`.

Parameters
----------
columns : list
The list of columns

Returns
-------
tuple[DeviceBuffer, size_type]
A tuple of the resulting mask and count of unset bits
"""
cdef Table c_table = Table(columns)
cdef pair[device_buffer, size_type] c_result
cdef unique_ptr[device_buffer] up_db

with nogil:
c_result = move(cpp_null_mask.bitmask_and(c_table.view()))
up_db = move(make_unique[device_buffer](move(c_result.first)))

dbuf = DeviceBuffer.c_from_unique_ptr(move(up_db))

return dbuf, c_result.second


cpdef tuple bitmask_or(list columns):
"""Performs bitwise OR of the bitmasks of a list of columns.

For details, see :cpp:func:`bitmask_or`.

Parameters
----------
columns : list
The list of columns

Returns
-------
tuple[DeviceBuffer, size_type]
A tuple of the resulting mask and count of unset bits
"""
cdef Table c_table = Table(columns)
cdef pair[device_buffer, size_type] c_result
cdef unique_ptr[device_buffer] up_db

with nogil:
c_result = move(cpp_null_mask.bitmask_or(c_table.view()))
up_db = move(make_unique[device_buffer](move(c_result.first)))

dbuf = DeviceBuffer.c_from_unique_ptr(move(up_db))

return dbuf, c_result.second
charlesbluca marked this conversation as resolved.
Show resolved Hide resolved
Loading
Loading