Skip to content

Commit

Permalink
Merge branch 'branch-24.12' into perf-minhash-highmem
Browse files Browse the repository at this point in the history
  • Loading branch information
davidwendt committed Oct 2, 2024
2 parents ef3b228 + 289e466 commit 1753a40
Show file tree
Hide file tree
Showing 30 changed files with 316 additions and 201 deletions.
6 changes: 5 additions & 1 deletion ci/build_wheel_libcudf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,15 @@ set -euo pipefail

package_dir="python/libcudf"

export SKBUILD_CMAKE_ARGS="-DUSE_NVCOMP_RUNTIME_WHEEL=ON"
./ci/build_wheel.sh ${package_dir}

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"

mkdir -p ${package_dir}/final_dist
python -m auditwheel repair -w ${package_dir}/final_dist ${package_dir}/dist/*
python -m auditwheel repair \
--exclude libnvcomp.so.4 \
-w ${package_dir}/final_dist \
${package_dir}/dist/*

RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 cpp ${package_dir}/final_dist
8 changes: 2 additions & 6 deletions cpp/cmake/thirdparty/get_nvcomp.cmake
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# =============================================================================
# Copyright (c) 2021-2022, NVIDIA CORPORATION.
# Copyright (c) 2021-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
Expand All @@ -16,11 +16,7 @@
function(find_and_configure_nvcomp)

include(${rapids-cmake-dir}/cpm/nvcomp.cmake)
rapids_cpm_nvcomp(
BUILD_EXPORT_SET cudf-exports
INSTALL_EXPORT_SET cudf-exports
USE_PROPRIETARY_BINARY ${CUDF_USE_PROPRIETARY_NVCOMP}
)
rapids_cpm_nvcomp(USE_PROPRIETARY_BINARY ${CUDF_USE_PROPRIETARY_NVCOMP})

# Per-thread default stream
if(TARGET nvcomp AND CUDF_USE_PER_THREAD_DEFAULT_STREAM)
Expand Down
28 changes: 27 additions & 1 deletion dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ files:
- depends_on_cupy
- depends_on_libkvikio
- depends_on_librmm
- depends_on_nvcomp
- depends_on_rmm
- develop
- docs
Expand Down Expand Up @@ -152,6 +153,13 @@ files:
- build_cpp
- depends_on_libkvikio
- depends_on_librmm
py_run_libcudf:
output: pyproject
pyproject_dir: python/libcudf
extras:
table: project
includes:
- depends_on_nvcomp
py_build_pylibcudf:
output: pyproject
pyproject_dir: python/pylibcudf
Expand Down Expand Up @@ -367,9 +375,27 @@ dependencies:
- fmt>=11.0.2,<12
- flatbuffers==24.3.25
- librdkafka>=2.5.0,<2.6.0a0
- spdlog>=1.14.1,<1.15
depends_on_nvcomp:
common:
- output_types: conda
packages:
# Align nvcomp version with rapids-cmake
- nvcomp==4.0.1
- spdlog>=1.14.1,<1.15
specific:
- output_types: [requirements, pyproject]
matrices:
- matrix:
cuda: "12.*"
packages:
- nvidia-nvcomp-cu12==4.0.1
- matrix:
cuda: "11.*"
packages:
- nvidia-nvcomp-cu11==4.0.1
- matrix:
packages:
- nvidia-nvcomp==4.0.1
rapids_build_skbuild:
common:
- output_types: [conda, requirements, pyproject]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,13 @@ strings
find
find_multiple
findall
padding
regex_flags
regex_program
repeat
replace
side_type
slice
split
strip
wrap
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
=======
padding
=======

.. automodule:: pylibcudf.strings.padding
:members:
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
=========
side_type
=========

.. automodule:: pylibcudf.strings.side_type
:members:
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
====
wrap
====

.. automodule:: pylibcudf.strings.wrap
:members:
9 changes: 1 addition & 8 deletions python/cudf/cudf/_lib/strings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,7 @@
from cudf._lib.strings.find_multiple import find_multiple
from cudf._lib.strings.findall import findall
from cudf._lib.strings.json import GetJsonObjectOptions, get_json_object
from cudf._lib.strings.padding import (
SideType,
center,
ljust,
pad,
rjust,
zfill,
)
from cudf._lib.strings.padding import center, ljust, pad, rjust, zfill
from cudf._lib.strings.repeat import repeat_scalar, repeat_sequence
from cudf._lib.strings.replace import (
insert,
Expand Down
112 changes: 16 additions & 96 deletions python/cudf/cudf/_lib/strings/padding.pyx
Original file line number Diff line number Diff line change
@@ -1,64 +1,31 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

from libcpp.memory cimport unique_ptr
from libcpp.string cimport string
from libcpp.utility cimport move

from cudf.core.buffer import acquire_spill_lock

from pylibcudf.libcudf.column.column_view cimport column_view
from pylibcudf.libcudf.types cimport size_type

from cudf._lib.column cimport Column

from enum import IntEnum

from pylibcudf.libcudf.column.column cimport column
from pylibcudf.libcudf.strings.padding cimport (
pad as cpp_pad,
zfill as cpp_zfill,
)
from pylibcudf.libcudf.strings.side_type cimport (
side_type,
underlying_type_t_side_type,
)


class SideType(IntEnum):
LEFT = <underlying_type_t_side_type> side_type.LEFT
RIGHT = <underlying_type_t_side_type> side_type.RIGHT
BOTH = <underlying_type_t_side_type> side_type.BOTH
import pylibcudf as plc


@acquire_spill_lock()
def pad(Column source_strings,
size_type width,
fill_char,
side=SideType.LEFT):
side=plc.strings.side_type.SideType.LEFT):
"""
Returns a Column by padding strings in `source_strings`
up to the given `width`. Direction of padding is to be specified by `side`.
The additional characters being filled can be changed by specifying
`fill_char`.
"""
cdef unique_ptr[column] c_result
cdef column_view source_view = source_strings.view()

cdef string f_char = <string>str(fill_char).encode()

cdef side_type pad_direction = <side_type>(
<underlying_type_t_side_type> side
plc_result = plc.strings.padding.pad(
source_strings.to_pylibcudf(mode="read"),
width,
side,
fill_char,
)

with nogil:
c_result = move(cpp_pad(
source_view,
width,
pad_direction,
f_char
))

return Column.from_unique_ptr(move(c_result))
return Column.from_pylibcudf(plc_result)


@acquire_spill_lock()
Expand All @@ -68,19 +35,13 @@ def zfill(Column source_strings,
Returns a Column by prepending strings in `source_strings`
with '0' characters up to the given `width`.
"""
cdef unique_ptr[column] c_result
cdef column_view source_view = source_strings.view()

with nogil:
c_result = move(cpp_zfill(
source_view,
width
))

return Column.from_unique_ptr(move(c_result))
plc_result = plc.strings.padding.zfill(
source_strings.to_pylibcudf(mode="read"),
width
)
return Column.from_pylibcudf(plc_result)


@acquire_spill_lock()
def center(Column source_strings,
size_type width,
fill_char):
Expand All @@ -89,65 +50,24 @@ def center(Column source_strings,
in `source_strings` with additional character, `fill_char`
up to the given `width`.
"""
cdef unique_ptr[column] c_result
cdef column_view source_view = source_strings.view()

cdef string f_char = <string>str(fill_char).encode()

with nogil:
c_result = move(cpp_pad(
source_view,
width,
side_type.BOTH,
f_char
))
return pad(source_strings, width, fill_char, plc.strings.side_type.SideType.BOTH)

return Column.from_unique_ptr(move(c_result))


@acquire_spill_lock()
def ljust(Column source_strings,
size_type width,
fill_char):
"""
Returns a Column by filling right side of strings in `source_strings`
with additional character, `fill_char` up to the given `width`.
"""
cdef unique_ptr[column] c_result
cdef column_view source_view = source_strings.view()

cdef string f_char = <string>str(fill_char).encode()
return pad(source_strings, width, fill_char, plc.strings.side_type.SideType.RIGHT)

with nogil:
c_result = move(cpp_pad(
source_view,
width,
side_type.RIGHT,
f_char
))

return Column.from_unique_ptr(move(c_result))


@acquire_spill_lock()
def rjust(Column source_strings,
size_type width,
fill_char):
"""
Returns a Column by filling left side of strings in `source_strings`
with additional character, `fill_char` up to the given `width`.
"""
cdef unique_ptr[column] c_result
cdef column_view source_view = source_strings.view()

cdef string f_char = <string>str(fill_char).encode()

with nogil:
c_result = move(cpp_pad(
source_view,
width,
side_type.LEFT,
f_char
))

return Column.from_unique_ptr(move(c_result))
return pad(source_strings, width, fill_char, plc.strings.side_type.SideType.LEFT)
Loading

0 comments on commit 1753a40

Please sign in to comment.