-
Notifications
You must be signed in to change notification settings - Fork 887
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement replace in pylibcudf (#15005)
Contributes to #13921 Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Bradley Dice (https://github.com/bdice) URL: #15005
- Loading branch information
Showing
11 changed files
with
304 additions
and
117 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,7 @@ This page provides API documentation for pylibcudf. | |
reduce | ||
rolling | ||
scalar | ||
replace | ||
table | ||
types | ||
unary |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
======= | ||
replace | ||
======= | ||
|
||
.. automodule:: cudf._lib.pylibcudf.replace | ||
:members: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# Copyright (c) 2023-2024, NVIDIA CORPORATION. | ||
|
||
from libcpp cimport bool | ||
|
||
from cudf._lib.cpp.replace cimport replace_policy | ||
|
||
from .column cimport Column | ||
from .scalar cimport Scalar | ||
|
||
ctypedef fused ReplacementType: | ||
Column | ||
Scalar | ||
replace_policy | ||
# Allowing object is a workaround for | ||
# https://github.com/cython/cython/issues/5984. See the implementation of | ||
# replace_nulls for details. | ||
object | ||
|
||
|
||
cpdef Column replace_nulls(Column source_column, ReplacementType replacement) | ||
|
||
cpdef Column find_and_replace_all( | ||
Column source_column, | ||
Column values_to_replace, | ||
Column replacement_values, | ||
) | ||
|
||
cpdef Column clamp( | ||
Column source_column, | ||
Scalar lo, | ||
Scalar hi, | ||
Scalar lo_replace=*, | ||
Scalar hi_replace=*, | ||
) | ||
|
||
cpdef Column normalize_nans_and_zeros(Column source_column, bool inplace=*) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,208 @@ | ||
# Copyright (c) 2023-2024, NVIDIA CORPORATION. | ||
|
||
|
||
from cython.operator import dereference | ||
|
||
from libcpp cimport bool | ||
from libcpp.memory cimport unique_ptr | ||
from libcpp.utility cimport move | ||
|
||
from cudf._lib.cpp cimport replace as cpp_replace | ||
from cudf._lib.cpp.column.column cimport column | ||
|
||
from cudf._lib.cpp.replace import \ | ||
replace_policy as ReplacePolicy # no-cython-lint | ||
|
||
from .column cimport Column | ||
from .scalar cimport Scalar | ||
|
||
|
||
cpdef Column replace_nulls(Column source_column, ReplacementType replacement): | ||
"""Replace nulls in source_column. | ||
The values used to replace nulls depends on the type of replacement: | ||
- If replacement is a Column, the corresponding value from replacement | ||
is used. | ||
- If replacement is a Scalar, the same value is used for all nulls. | ||
- If replacement is a replace_policy, the policy is used to determine | ||
the replacement value: | ||
- PRECEDING: The first non-null value that precedes the null is used. | ||
- FOLLOWING: The first non-null value that follows the null is used. | ||
For more details, see :cpp:func:`replace_nulls`. | ||
Parameters | ||
---------- | ||
source_column : Column | ||
The column in which to replace nulls. | ||
replacement_column : Union[Column, Scalar, replace_policy] | ||
If a Column, the values to use as replacements. If a Scalar, the value | ||
to use as a replacement. If a replace_policy, the policy to use to | ||
determine the replacement value. | ||
Returns | ||
------- | ||
Column | ||
A copy of source_column with nulls replaced by values from | ||
replacement_column. | ||
""" | ||
cdef unique_ptr[column] c_result | ||
cdef replace_policy policy | ||
# Due to https://github.com/cython/cython/issues/5984, if this function is | ||
# called as a Python function (i.e. without typed inputs, which is always | ||
# true in pure Python files), the type of `replacement` will be `object` | ||
# instead of `replace_policy`. This is a workaround to handle that case. | ||
if ReplacementType is object: | ||
if isinstance(replacement, ReplacePolicy): | ||
policy = replacement | ||
with nogil: | ||
c_result = move( | ||
cpp_replace.replace_nulls(source_column.view(), policy) | ||
) | ||
return Column.from_libcudf(move(c_result)) | ||
else: | ||
raise TypeError("replacement must be a Column, Scalar, or replace_policy") | ||
|
||
with nogil: | ||
if ReplacementType is Column: | ||
c_result = move( | ||
cpp_replace.replace_nulls(source_column.view(), replacement.view()) | ||
) | ||
elif ReplacementType is Scalar: | ||
c_result = move( | ||
cpp_replace.replace_nulls( | ||
source_column.view(), dereference(replacement.c_obj) | ||
) | ||
) | ||
elif ReplacementType is replace_policy: | ||
c_result = move( | ||
cpp_replace.replace_nulls(source_column.view(), replacement) | ||
) | ||
else: | ||
assert False, "Internal error. Please contact pylibcudf developers" | ||
return Column.from_libcudf(move(c_result)) | ||
|
||
|
||
cpdef Column find_and_replace_all( | ||
Column source_column, | ||
Column values_to_replace, | ||
Column replacement_values, | ||
): | ||
"""Replace all occurrences of values_to_replace with replacement_values. | ||
For details, see :cpp:func:`find_and_replace_all`. | ||
Parameters | ||
---------- | ||
source_column : Column | ||
The column in which to replace values. | ||
values_to_replace : Column | ||
The column containing values to replace. | ||
replacement_values : Column | ||
The column containing replacement values. | ||
Returns | ||
------- | ||
Column | ||
A copy of source_column with all occurrences of values_to_replace | ||
replaced by replacement_values. | ||
""" | ||
cdef unique_ptr[column] c_result | ||
with nogil: | ||
c_result = move( | ||
cpp_replace.find_and_replace_all( | ||
source_column.view(), | ||
values_to_replace.view(), | ||
replacement_values.view(), | ||
) | ||
) | ||
return Column.from_libcudf(move(c_result)) | ||
|
||
|
||
cpdef Column clamp( | ||
Column source_column, | ||
Scalar lo, | ||
Scalar hi, | ||
Scalar lo_replace=None, | ||
Scalar hi_replace=None, | ||
): | ||
"""Clamp the values in source_column to the range [lo, hi]. | ||
For details, see :cpp:func:`clamp`. | ||
Parameters | ||
---------- | ||
source_column : Column | ||
The column to clamp. | ||
lo : Scalar | ||
The lower bound of the clamp range. | ||
hi : Scalar | ||
The upper bound of the clamp range. | ||
lo_replace : Scalar, optional | ||
The value to use for elements that are less than lo. If not specified, | ||
the value of lo is used. | ||
hi_replace : Scalar, optional | ||
The value to use for elements that are greater than hi. If not | ||
specified, the value of hi is used. | ||
Returns | ||
------- | ||
Column | ||
A copy of source_column with values clamped to the range [lo, hi]. | ||
""" | ||
if (lo_replace is None) != (hi_replace is None): | ||
raise ValueError("lo_replace and hi_replace must be specified together") | ||
|
||
cdef unique_ptr[column] c_result | ||
with nogil: | ||
if lo_replace is None: | ||
c_result = move( | ||
cpp_replace.clamp( | ||
source_column.view(), | ||
dereference(lo.c_obj), | ||
dereference(hi.c_obj), | ||
) | ||
) | ||
else: | ||
c_result = move( | ||
cpp_replace.clamp( | ||
source_column.view(), | ||
dereference(lo.c_obj), | ||
dereference(hi.c_obj), | ||
dereference(lo_replace.c_obj), | ||
dereference(hi_replace.c_obj), | ||
) | ||
) | ||
return Column.from_libcudf(move(c_result)) | ||
|
||
|
||
cpdef Column normalize_nans_and_zeros(Column source_column, bool inplace=False): | ||
"""Normalize NaNs and zeros in source_column. | ||
For details, see :cpp:func:`normalize_nans_and_zeros`. | ||
Parameters | ||
---------- | ||
source_column : Column | ||
The column to normalize. | ||
inplace : bool, optional | ||
If True, normalize source_column in place. If False, return a new | ||
column with the normalized values. | ||
Returns | ||
------- | ||
Column | ||
A copy of source_column with NaNs and zeros normalized. | ||
""" | ||
cdef unique_ptr[column] c_result | ||
with nogil: | ||
if inplace: | ||
cpp_replace.normalize_nans_and_zeros(source_column.mutable_view()) | ||
else: | ||
c_result = move( | ||
cpp_replace.normalize_nans_and_zeros(source_column.view()) | ||
) | ||
|
||
if not inplace: | ||
return Column.from_libcudf(move(c_result)) |
Oops, something went wrong.