Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix inf/NaN comparisons for FLOAT orderby in window functions #13635

Merged
merged 14 commits into from
Jul 5, 2023
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 143 additions & 0 deletions cpp/src/rolling/detail/range_comparator_utils.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cudf/strings/string_view.hpp>
#include <cudf/utilities/traits.hpp>

#include <thrust/functional.h>

#include <cmath>
#include <limits>

namespace cudf::detail {

/// For order-by columns of signed types, bounds calculation might cause accidental
/// overflow/underflows. This needs to be detected and handled appropriately
/// for signed and unsigned types.

/**
* @brief Add `delta` to value, and cap at numeric_limits::max(), for signed types.
*/
template <typename T, CUDF_ENABLE_IF(cuda::std::numeric_limits<T>::is_signed)>
__host__ __device__ T add_safe(T const& value, T const& delta)
{
if constexpr (std::is_floating_point_v<T>) {
if (std::isinf(value) or std::isnan(value)) { return value; }
}
// delta >= 0.
return (value < 0 || (cuda::std::numeric_limits<T>::max() - value) >= delta)
? (value + delta)
: cuda::std::numeric_limits<T>::max();
}

/**
* @brief Add `delta` to value, and cap at numeric_limits::max(), for unsigned types.
*/
template <typename T, CUDF_ENABLE_IF(not cuda::std::numeric_limits<T>::is_signed)>
__host__ __device__ T add_safe(T const& value, T const& delta)
{
// delta >= 0.
return ((cuda::std::numeric_limits<T>::max() - value) >= delta)
? (value + delta)
: cuda::std::numeric_limits<T>::max();
}

/**
* @brief Subtract `delta` from value, and cap at numeric_limits::lowest(), for signed types.
*
* Note: We use numeric_limits::lowest() instead of min() because for floats, lowest() returns
* the smallest finite value, as opposed to min() which returns the smallest _positive_ value.
*/
template <typename T, CUDF_ENABLE_IF(cuda::std::numeric_limits<T>::is_signed)>
__host__ __device__ T subtract_safe(T const& value, T const& delta)
{
if constexpr (std::is_floating_point_v<T>) {
if (std::isinf(value) or std::isnan(value)) { return value; }
}
// delta >= 0;
return (value >= 0 || (value - cuda::std::numeric_limits<T>::lowest()) >= delta)
? (value - delta)
: cuda::std::numeric_limits<T>::lowest();
}

/**
* @brief Subtract `delta` from value, and cap at numeric_limits::lowest(), for unsigned types.
*
* Note: We use numeric_limits::lowest() instead of min() because for floats, lowest() returns
* the smallest finite value, as opposed to min() which returns the smallest _positive_ value.
*
* This distinction isn't truly relevant for this overload (because float is signed).
* lowest() is kept for uniformity.
*/
template <typename T, CUDF_ENABLE_IF(not cuda::std::numeric_limits<T>::is_signed)>
__host__ __device__ T subtract_safe(T const& value, T const& delta)
{
// delta >= 0;
return ((value - cuda::std::numeric_limits<T>::lowest()) >= delta)
? (value - delta)
: cuda::std::numeric_limits<T>::lowest();
}

/**
* @brief Comparator for numeric order-by columns, handling floating point NaN values.
*
* This is required for binary search through sorted vectors that contain NaN values.
* With ascending sort, NaN values are stored at the end of the sequence, even
* greater than infinity.
mythrocks marked this conversation as resolved.
Show resolved Hide resolved
* But thrust::less would have trouble locating it because:
* 1. thrust::less(NaN, 10) returns false
* 2. thrust::less(10, NaN) also returns false
*
* This comparator honours the position of NaN values vis-à-vis non-NaN values.
*
*/
struct nan_aware_less {
template <typename T, CUDF_ENABLE_IF(not cudf::is_floating_point<T>())>
__host__ __device__ bool operator()(T const& lhs, T const& rhs) const
{
return thrust::less<T>{}(lhs, rhs);
}

template <typename T, CUDF_ENABLE_IF(cudf::is_floating_point<T>())>
__host__ __device__ bool operator()(T const& lhs, T const& rhs) const
{
if (std::isnan(lhs)) { return false; }
return std::isnan(rhs) ? true : thrust::less<T>{}(lhs, rhs);
mythrocks marked this conversation as resolved.
Show resolved Hide resolved
}
};

/**
* @brief Comparator for numeric order-by columns, handling floating point NaN values. *
mythrocks marked this conversation as resolved.
Show resolved Hide resolved
*
* This is required for binary search through sorted vectors that contain NaN values.
* With descending sort, NaN values are stored at the beginning of the sequence, even
* greater than infinity.
* But thrust::greater would have trouble locating it because:
* 1. thrust::greater(NaN, 10) returns false
* 2. thrust::greater(10, NaN) also returns false
*
* This comparator honours the position of NaN values vis-à-vis non-NaN values.
mythrocks marked this conversation as resolved.
Show resolved Hide resolved
*
*/
struct nan_aware_greater {
template <typename T>
__host__ __device__ bool operator()(T const& lhs, T const& rhs) const
{
return nan_aware_less{}(rhs, lhs);
}
};
} // namespace cudf::detail
112 changes: 29 additions & 83 deletions cpp/src/rolling/grouped_rolling.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
* limitations under the License.
*/

#include "detail/range_comparator_utils.cuh"
#include "detail/range_window_bounds.hpp"
#include "detail/rolling.cuh"
#include "detail/rolling_jit.hpp"
Expand Down Expand Up @@ -218,67 +219,6 @@ std::unique_ptr<column> grouped_rolling_window(table_view const& group_keys,

namespace {

/// For order-by columns of signed types, bounds calculation might cause accidental
/// overflow/underflows. This needs to be detected and handled appropriately
/// for signed and unsigned types.

/**
* @brief Add `delta` to value, and cap at numeric_limits::max(), for signed types.
*/
template <typename T, CUDF_ENABLE_IF(cuda::std::numeric_limits<T>::is_signed)>
__device__ T add_safe(T const& value, T const& delta)
{
// delta >= 0.
return (value < 0 || (cuda::std::numeric_limits<T>::max() - value) >= delta)
? (value + delta)
: cuda::std::numeric_limits<T>::max();
}

/**
* @brief Add `delta` to value, and cap at numeric_limits::max(), for unsigned types.
*/
template <typename T, CUDF_ENABLE_IF(not cuda::std::numeric_limits<T>::is_signed)>
__device__ T add_safe(T const& value, T const& delta)
{
// delta >= 0.
return ((cuda::std::numeric_limits<T>::max() - value) >= delta)
? (value + delta)
: cuda::std::numeric_limits<T>::max();
}

/**
* @brief Subtract `delta` from value, and cap at numeric_limits::lowest(), for signed types.
*
* Note: We use numeric_limits::lowest() instead of min() because for floats, lowest() returns
* the smallest finite value, as opposed to min() which returns the smallest _positive_ value.
*/
template <typename T, CUDF_ENABLE_IF(cuda::std::numeric_limits<T>::is_signed)>
__device__ T subtract_safe(T const& value, T const& delta)
{
// delta >= 0;
return (value >= 0 || (value - cuda::std::numeric_limits<T>::lowest()) >= delta)
? (value - delta)
: cuda::std::numeric_limits<T>::lowest();
}

/**
* @brief Subtract `delta` from value, and cap at numeric_limits::lowest(), for unsigned types.
*
* Note: We use numeric_limits::lowest() instead of min() because for floats, lowest() returns
* the smallest finite value, as opposed to min() which returns the smallest _positive_ value.
*
* This distinction isn't truly relevant for this overload (because float is signed).
* lowest() is kept for uniformity.
*/
template <typename T, CUDF_ENABLE_IF(not cuda::std::numeric_limits<T>::is_signed)>
__device__ T subtract_safe(T const& value, T const& delta)
{
// delta >= 0;
return ((value - cuda::std::numeric_limits<T>::lowest()) >= delta)
? (value - delta)
: cuda::std::numeric_limits<T>::lowest();
}

/**
* @brief For a specified idx, find the lowest value of the (sorted) orderby column that
* participates in a range-window query.
Expand All @@ -291,7 +231,7 @@ __device__ ElementT compute_lowest_in_window(ElementIter orderby_iter,
if constexpr (std::is_same_v<ElementT, cudf::string_view>) {
return orderby_iter[idx];
} else {
return subtract_safe(orderby_iter[idx], delta);
return cudf::detail::subtract_safe(orderby_iter[idx], delta);
}
}

Expand All @@ -307,7 +247,7 @@ __device__ ElementT compute_highest_in_window(ElementIter orderby_iter,
if constexpr (std::is_same_v<ElementT, cudf::string_view>) {
return orderby_iter[idx];
} else {
return add_safe(orderby_iter[idx], delta);
return cudf::detail::add_safe(orderby_iter[idx], delta);
}
}

Expand Down Expand Up @@ -436,7 +376,8 @@ std::unique_ptr<column> range_window_ASC(column_view const& input,
return ((d_orderby + idx) - thrust::lower_bound(thrust::seq,
d_orderby + group_start,
d_orderby + idx,
lowest_in_window)) +
lowest_in_window,
cudf::detail::nan_aware_less{})) +
1; // Add 1, for `preceding` to account for current row.
};

Expand Down Expand Up @@ -466,8 +407,11 @@ std::unique_ptr<column> range_window_ASC(column_view const& input,
auto const group_end = nulls_begin_idx == 0 ? num_rows : nulls_begin_idx;
auto const highest_in_window = compute_highest_in_window(d_orderby, idx, following_window);

return (thrust::upper_bound(
thrust::seq, d_orderby + idx, d_orderby + group_end, highest_in_window) -
return (thrust::upper_bound(thrust::seq,
d_orderby + idx,
d_orderby + group_end,
highest_in_window,
cudf::detail::nan_aware_less{}) -
(d_orderby + idx)) -
1;
};
Expand Down Expand Up @@ -612,7 +556,8 @@ std::unique_ptr<column> range_window_ASC(column_view const& input,
return ((d_orderby + idx) - thrust::lower_bound(thrust::seq,
d_orderby + search_start,
d_orderby + idx,
lowest_in_window)) +
lowest_in_window,
cudf::detail::nan_aware_less{})) +
1; // Add 1, for `preceding` to account for current row.
};

Expand Down Expand Up @@ -653,8 +598,11 @@ std::unique_ptr<column> range_window_ASC(column_view const& input,
auto const search_end = nulls_begin == group_start ? group_end : nulls_begin;
auto const highest_in_window = compute_highest_in_window(d_orderby, idx, following_window);

return (thrust::upper_bound(
thrust::seq, d_orderby + idx, d_orderby + search_end, highest_in_window) -
return (thrust::upper_bound(thrust::seq,
d_orderby + idx,
d_orderby + search_end,
highest_in_window,
cudf::detail::nan_aware_less{}) -
(d_orderby + idx)) -
1;
};
Expand Down Expand Up @@ -710,12 +658,11 @@ std::unique_ptr<column> range_window_DESC(column_view const& input,
auto const group_start = nulls_begin_idx == 0 ? nulls_end_idx : 0;
auto const highest_in_window = compute_highest_in_window(d_orderby, idx, preceding_window);

return ((d_orderby + idx) -
thrust::lower_bound(thrust::seq,
d_orderby + group_start,
d_orderby + idx,
highest_in_window,
thrust::greater<decltype(highest_in_window)>())) +
return ((d_orderby + idx) - thrust::lower_bound(thrust::seq,
d_orderby + group_start,
d_orderby + idx,
highest_in_window,
cudf::detail::nan_aware_greater{})) +
1; // Add 1, for `preceding` to account for current row.
};

Expand Down Expand Up @@ -749,7 +696,7 @@ std::unique_ptr<column> range_window_DESC(column_view const& input,
d_orderby + idx,
d_orderby + group_end,
lowest_in_window,
thrust::greater<decltype(lowest_in_window)>()) -
cudf::detail::nan_aware_greater{}) -
(d_orderby + idx)) -
1;
};
Expand Down Expand Up @@ -810,12 +757,11 @@ std::unique_ptr<column> range_window_DESC(column_view const& input,
auto const search_start = nulls_begin == group_start ? nulls_end : group_start;
auto const highest_in_window = compute_highest_in_window(d_orderby, idx, preceding_window);

return ((d_orderby + idx) -
thrust::lower_bound(thrust::seq,
d_orderby + search_start,
d_orderby + idx,
highest_in_window,
thrust::greater<decltype(highest_in_window)>())) +
return ((d_orderby + idx) - thrust::lower_bound(thrust::seq,
d_orderby + search_start,
d_orderby + idx,
highest_in_window,
cudf::detail::nan_aware_greater{})) +
1; // Add 1, for `preceding` to account for current row.
};

Expand Down Expand Up @@ -857,7 +803,7 @@ std::unique_ptr<column> range_window_DESC(column_view const& input,
d_orderby + idx,
d_orderby + search_end,
lowest_in_window,
thrust::greater<decltype(lowest_in_window)>()) -
cudf::detail::nan_aware_greater{}) -
(d_orderby + idx)) -
1;
};
Expand Down
1 change: 1 addition & 0 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,7 @@ ConfigureTest(
rolling/grouped_rolling_test.cpp
rolling/lead_lag_test.cpp
rolling/nth_element_test.cpp
rolling/range_comparator_test.cu
rolling/range_rolling_window_test.cpp
rolling/range_window_bounds_test.cpp
rolling/rolling_test.cpp
Expand Down
Loading