Skip to content

Commit

Permalink
Add string.convert.convert_integers APIs to pylibcudf
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke committed Oct 3, 2024
1 parent 3faa3ee commit 52abc0e
Show file tree
Hide file tree
Showing 8 changed files with 327 additions and 71 deletions.
83 changes: 21 additions & 62 deletions python/cudf/cudf/_lib/string_casting.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,6 @@ from pylibcudf.libcudf.strings.convert.convert_floats cimport (
from_floats as cpp_from_floats,
to_floats as cpp_to_floats,
)
from pylibcudf.libcudf.strings.convert.convert_integers cimport (
from_integers as cpp_from_integers,
hex_to_integers as cpp_hex_to_integers,
integers_to_hex as cpp_integers_to_hex,
is_hex as cpp_is_hex,
to_integers as cpp_to_integers,
)
from pylibcudf.libcudf.strings.convert.convert_ipv4 cimport (
integers_to_ipv4 as cpp_integers_to_ipv4,
ipv4_to_integers as cpp_ipv4_to_integers,
Expand Down Expand Up @@ -143,32 +136,18 @@ def stof(Column input_col):


def integer_to_string(Column input_col):
cdef column_view input_column_view = input_col.view()
cdef unique_ptr[column] c_result
with nogil:
c_result = move(
cpp_from_integers(
input_column_view))

return Column.from_unique_ptr(move(c_result))
plc_column = plc.strings.convert.convert_integers.from_integers(
input_col.to_pylibcudf(mode="read"),
)
return Column.from_pylibcudf(plc_column)


def string_to_integer(Column input_col, object out_type):
cdef column_view input_column_view = input_col.view()
cdef unique_ptr[column] c_result
cdef type_id tid = <type_id> (
<underlying_type_t_type_id> (
SUPPORTED_NUMPY_TO_LIBCUDF_TYPES[out_type]
)
plc_column = plc.strings.convert.convert_integers.to_integers(
input_col.to_pylibcudf(mode="read"),
dtype_to_pylibcudf_type(out_type)
)
cdef data_type c_out_type = data_type(tid)
with nogil:
c_result = move(
cpp_to_integers(
input_column_view,
c_out_type))

return Column.from_unique_ptr(move(c_result))
return Column.from_pylibcudf(plc_column)


def i8tos(Column input_col):
Expand Down Expand Up @@ -696,7 +675,7 @@ def is_ipv4(Column source_strings):
return Column.from_unique_ptr(move(c_result))


def htoi(Column input_col, **kwargs):
def htoi(Column input_col):
"""
Converting input column of type string having hex values
to integer of out_type
Expand All @@ -709,38 +688,22 @@ def htoi(Column input_col, **kwargs):
-------
A Column of integers parsed from hexadecimal string values.
"""

cdef column_view input_column_view = input_col.view()
cdef type_id tid = <type_id> (
<underlying_type_t_type_id> (
SUPPORTED_NUMPY_TO_LIBCUDF_TYPES[cudf.dtype("int64")]
)
plc_column = plc.strings.convert.convert_integers.hex_to_integers(
input_col.to_pylibcudf(mode="read"),
dtype_to_pylibcudf_type(cudf.dtype("int64"))
)
cdef data_type c_out_type = data_type(tid)

cdef unique_ptr[column] c_result
with nogil:
c_result = move(
cpp_hex_to_integers(input_column_view,
c_out_type))

return Column.from_unique_ptr(move(c_result))
return Column.from_pylibcudf(plc_column)


def is_hex(Column source_strings):
"""
Returns a Column of boolean values with True for `source_strings`
that have hex characters.
"""
cdef unique_ptr[column] c_result
cdef column_view source_view = source_strings.view()

with nogil:
c_result = move(cpp_is_hex(
source_view
))

return Column.from_unique_ptr(move(c_result))
plc_column = plc.strings.convert.convert_integers.is_hex(
source_strings.to_pylibcudf(mode="read"),
)
return Column.from_pylibcudf(plc_column)


def itoh(Column input_col):
Expand All @@ -756,11 +719,7 @@ def itoh(Column input_col):
-------
A Column of strings with hexadecimal characters.
"""

cdef column_view input_column_view = input_col.view()
cdef unique_ptr[column] c_result
with nogil:
c_result = move(
cpp_integers_to_hex(input_column_view))

return Column.from_unique_ptr(move(c_result))
plc_column = plc.strings.convert.convert_integers.integers_to_hex(
input_col.to_pylibcudf(mode="read"),
)
return Column.from_pylibcudf(plc_column)
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,28 @@ from pylibcudf.libcudf.types cimport data_type
cdef extern from "cudf/strings/convert/convert_integers.hpp" namespace \
"cudf::strings" nogil:
cdef unique_ptr[column] to_integers(
column_view input_col,
column_view input,
data_type output_type) except +

cdef unique_ptr[column] from_integers(
column_view input_col) except +
column_view integers) except +

cdef unique_ptr[column] is_integer(
column_view source_strings
column_view input
) except +

cdef unique_ptr[column] is_integer(
column_view input,
data_type int_type
) except +

cdef unique_ptr[column] hex_to_integers(
column_view input_col,
column_view input,
data_type output_type) except +

cdef unique_ptr[column] is_hex(
column_view source_strings
column_view input
) except +

cdef unique_ptr[column] integers_to_hex(
column_view input_col) except +
column_view input) except +
2 changes: 1 addition & 1 deletion python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# the License.
# =============================================================================

set(cython_sources convert_durations.pyx convert_datetime.pyx)
set(cython_sources convert_durations.pyx convert_datetime.pyx convert_integers.pyx)

set(linked_libraries cudf::cudf)
rapids_cython_create_modules(
Expand Down
2 changes: 1 addition & 1 deletion python/pylibcudf/pylibcudf/strings/convert/__init__.pxd
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
from . cimport convert_datetime, convert_durations
from . cimport convert_datetime, convert_durations, convert_integers
2 changes: 1 addition & 1 deletion python/pylibcudf/pylibcudf/strings/convert/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
from . import convert_datetime, convert_durations
from . import convert_datetime, convert_durations, convert_integers
17 changes: 17 additions & 0 deletions python/pylibcudf/pylibcudf/strings/convert/convert_integers.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from pylibcudf.column cimport Column
from pylibcudf.types cimport DataType


cpdef Column to_integers(Column input, DataType output_type)

cpdef Column from_integers(Column integers)

cpdef Column is_integer(Column input, DataType int_type=*)

cpdef Column hex_to_integers(Column input, DataType output_type)

cpdef Column is_hex(Column input)

cpdef Column integers_to_hex(Column input)
Loading

0 comments on commit 52abc0e

Please sign in to comment.