Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix call to thrust::reduce_by_key in argmin/argmax libcudf groupby #9244

Closed
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# cuDF 21.12.00 (Date TBD)

Please see https://github.com/rapidsai/cudf/releases/tag/v21.12.00a for the latest changes to this development branch.

# cuDF 21.10.00 (Date TBD)

Please see https://github.com/rapidsai/cudf/releases/tag/v21.10.00a for the latest changes to this development branch.
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/cudf_dev_cuda11.0.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ dependencies:
- clang=11.0.0
- clang-tools=11.0.0
- cupy>7.1.0,<10.0.0a0
- rmm=21.10.*
- rmm=21.12.*
- cmake>=3.20.1
- cmake_setuptools>=0.1.3
- python>=3.7,<3.9
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/cudf_dev_cuda11.2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ dependencies:
- clang=11.0.0
- clang-tools=11.0.0
- cupy>7.1.0,<10.0.0a0
- rmm=21.10.*
- rmm=21.12.*
- cmake>=3.20.1
- cmake_setuptools>=0.1.3
- python>=3.7,<3.9
Expand Down
2 changes: 1 addition & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ include(rapids-find)

rapids_cuda_init_architectures(CUDF)

project(CUDF VERSION 21.10.00 LANGUAGES C CXX CUDA)
project(CUDF VERSION 21.12.00 LANGUAGES C CXX CUDA)

# Needed because GoogleBenchmark changes the state of FindThreads.cmake,
# causing subsequent runs to have different values for the `Threads::Threads` target.
Expand Down
4 changes: 2 additions & 2 deletions cpp/doxygen/Doxyfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ PROJECT_NAME = "libcudf"
# could be handy for archiving the generated documentation or if some version
# control system is used.

PROJECT_NUMBER = 21.10.00
PROJECT_NUMBER = 21.12.00

# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer a
Expand Down Expand Up @@ -2167,7 +2167,7 @@ SKIP_FUNCTION_MACROS = YES
# the path). If a tag file is not located in the directory in which doxygen is
# run, you must also specify the path to the tagfile here.

TAGFILES = rmm.tag=https://docs.rapids.ai/api/librmm/21.10
TAGFILES = rmm.tag=https://docs.rapids.ai/api/librmm/21.12

# When a file name is specified after GENERATE_TAGFILE, doxygen will create a
# tag file that is based on the input files it reads. See section "Linking to
Expand Down
2 changes: 1 addition & 1 deletion cpp/examples/basic/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ set(CPM_DOWNLOAD_VERSION v0.32.2)
file(DOWNLOAD https://github.com/cpm-cmake/CPM.cmake/releases/download/${CPM_DOWNLOAD_VERSION}/get_cpm.cmake ${CMAKE_BINARY_DIR}/cmake/get_cpm.cmake)
include(${CMAKE_BINARY_DIR}/cmake/get_cpm.cmake)

set(CUDF_TAG branch-21.10)
set(CUDF_TAG branch-21.12)
CPMFindPackage(NAME cudf
GIT_REPOSITORY https://github.com/rapidsai/cudf
GIT_TAG ${CUDF_TAG}
Expand Down
2 changes: 1 addition & 1 deletion cpp/libcudf_kafka/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ include(rapids-cuda)
include(rapids-export)
include(rapids-find)

project(CUDA_KAFKA VERSION 21.10.00 LANGUAGES CXX)
project(CUDA_KAFKA VERSION 21.12.00 LANGUAGES CXX)

# Set a default build type if none was specified
rapids_cmake_build_type(Release)
Expand Down
96 changes: 30 additions & 66 deletions cpp/src/groupby/sort/group_single_pass_reduction_util.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -31,77 +31,50 @@
#include <thrust/functional.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/discard_iterator.h>
#include <thrust/iterator/transform_output_iterator.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/reduce.h>

namespace cudf {
namespace groupby {
namespace detail {

// ArgMin binary operator with tuple of (value, index)
/**
* @brief ArgMin binary operator with index values into input column.
*
* @tparam T Type of the underlying column. Must support '<' operator.
*/
template <typename T>
struct ArgMin {
CUDA_HOST_DEVICE_CALLABLE auto operator()(thrust::tuple<T, size_type> const& lhs,
thrust::tuple<T, size_type> const& rhs) const
{
if (thrust::get<1>(lhs) == cudf::detail::ARGMIN_SENTINEL)
return rhs;
else if (thrust::get<1>(rhs) == cudf::detail::ARGMIN_SENTINEL)
return lhs;
else
return thrust::get<0>(lhs) < thrust::get<0>(rhs) ? lhs : rhs;
}
};

// ArgMax binary operator with tuple of (value, index)
template <typename T>
struct ArgMax {
CUDA_HOST_DEVICE_CALLABLE auto operator()(thrust::tuple<T, size_type> const& lhs,
thrust::tuple<T, size_type> const& rhs) const
{
if (thrust::get<1>(lhs) == cudf::detail::ARGMIN_SENTINEL)
return rhs;
else if (thrust::get<1>(rhs) == cudf::detail::ARGMIN_SENTINEL)
return lhs;
else
return thrust::get<0>(lhs) > thrust::get<0>(rhs) ? lhs : rhs;
}
};

struct get_tuple_second_element {
template <typename T>
__device__ size_type operator()(thrust::tuple<T, size_type> const& rhs) const
column_device_view const d_col;
CUDA_DEVICE_CALLABLE auto operator()(size_type const& lhs, size_type const& rhs) const
{
return thrust::get<1>(rhs);
// The extra bounds checking is due to issue github.com/rapidsai/cudf/9156 and
// github.com/NVIDIA/thrust/issues/1525
// where invalid random values may be passed here by thrust::reduce_by_key
if (lhs < 0 || lhs >= d_col.size() || d_col.is_null(lhs)) { return rhs; }
davidwendt marked this conversation as resolved.
Show resolved Hide resolved
if (rhs < 0 || rhs >= d_col.size() || d_col.is_null(rhs)) { return lhs; }
return d_col.element<T>(lhs) < d_col.element<T>(rhs) ? lhs : rhs;
}
};

/**
* @brief Functor to store the boolean value to null mask.
* @brief ArgMax binary operator with index values into input column.
*
* @tparam T Type of the underlying column. Must support '<' operator.
*/
struct bool_to_nullmask {
mutable_column_device_view d_result;
__device__ void operator()(size_type i, bool rhs)
template <typename T>
struct ArgMax {
column_device_view const d_col;
CUDA_DEVICE_CALLABLE auto operator()(size_type const& lhs, size_type const& rhs) const
{
if (rhs) {
d_result.set_valid(i);
} else {
d_result.set_null(i);
}
// The extra bounds checking is due to issue github.com/rapidsai/cudf/9156 and
// github.com/NVIDIA/thrust/issues/1525
// where invalid random values may be passed here by thrust::reduce_by_key
if (lhs < 0 || lhs >= d_col.size() || d_col.is_null(lhs)) { return rhs; }
if (rhs < 0 || rhs >= d_col.size() || d_col.is_null(rhs)) { return lhs; }
return d_col.element<T>(rhs) < d_col.element<T>(lhs) ? lhs : rhs;
}
};

/**
* @brief Returns index for non-null element, and SENTINEL for null element in a column.
*
*/
struct null_as_sentinel {
column_device_view const col;
size_type const SENTINEL;
__device__ size_type operator()(size_type i) const { return col.is_null(i) ? SENTINEL : i; }
};

/**
* @brief Value accessor for column which supports dictionary column too.
*
Expand Down Expand Up @@ -191,25 +164,16 @@ struct reduce_functor {
auto resultview = mutable_column_device_view::create(result->mutable_view(), stream);
auto valuesview = column_device_view::create(values, stream);
if constexpr (K == aggregation::ARGMAX || K == aggregation::ARGMIN) {
constexpr auto SENTINEL =
(K == aggregation::ARGMAX ? cudf::detail::ARGMAX_SENTINEL : cudf::detail::ARGMIN_SENTINEL);
auto idx_begin =
cudf::detail::make_counting_transform_iterator(0, null_as_sentinel{*valuesview, SENTINEL});
// dictionary keys are sorted, so dictionary32 index comparison is enough.
auto column_begin = valuesview->begin<DeviceType>();
auto begin = thrust::make_zip_iterator(thrust::make_tuple(column_begin, idx_begin));
auto result_begin = thrust::make_transform_output_iterator(resultview->begin<ResultDType>(),
get_tuple_second_element{});
using OpType =
std::conditional_t<(K == aggregation::ARGMAX), ArgMax<DeviceType>, ArgMin<DeviceType>>;
thrust::reduce_by_key(rmm::exec_policy(stream),
group_labels.data(),
group_labels.data() + group_labels.size(),
begin,
thrust::make_counting_iterator<ResultType>(0),
thrust::make_discard_iterator(),
result_begin,
thrust::equal_to<size_type>{},
OpType{});
resultview->begin<ResultType>(),
thrust::equal_to<ResultType>{},
OpType{*valuesview});
} else {
auto init = OpType::template identity<DeviceType>();
auto begin = cudf::detail::make_counting_transform_iterator(
Expand Down
4 changes: 2 additions & 2 deletions docs/cudf/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@
# built documents.
#
# The short X.Y version.
version = '21.10'
version = '21.12'
# The full version, including alpha/beta/rc tags.
release = '21.10.00'
release = '21.12.00'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
2 changes: 1 addition & 1 deletion java/src/main/native/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ if(DEFINED GPU_ARCHS)
endif()
rapids_cuda_init_architectures(CUDF_JNI)

project(CUDF_JNI VERSION 21.10.00 LANGUAGES C CXX CUDA)
project(CUDF_JNI VERSION 21.12.00 LANGUAGES C CXX CUDA)

###################################################################################################
# - build options ---------------------------------------------------------------------------------
Expand Down