rapidsai · rapids-bot · Jul 5, 2023 · Jun 26, 2023 · Jun 29, 2023 · Jun 29, 2023
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2023, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/strings/string_view.hpp>
+#include <cudf/utilities/traits.hpp>
+
+#include <thrust/functional.h>
+
+#include <cmath>
+#include <limits>
+
+namespace cudf::detail {
+
+/// For order-by columns of signed types, bounds calculation might cause accidental
+/// overflow/underflows. This needs to be detected and handled appropriately
+/// for signed and unsigned types.
+
+/**
+ * @brief Add `delta` to value, and cap at numeric_limits::max(), for signed types.
+ */
+template <typename T, CUDF_ENABLE_IF(cuda::std::numeric_limits<T>::is_signed)>
+__host__ __device__ T add_safe(T const& value, T const& delta)
+{
+  if constexpr (std::is_floating_point_v<T>) {
+    if (std::isinf(value) or std::isnan(value)) { return value; }
+  }
+  // delta >= 0.
+  return (value < 0 || (cuda::std::numeric_limits<T>::max() - value) >= delta)
+           ? (value + delta)
+           : cuda::std::numeric_limits<T>::max();
+}
+
+/**
+ * @brief Add `delta` to value, and cap at numeric_limits::max(), for unsigned types.
+ */
+template <typename T, CUDF_ENABLE_IF(not cuda::std::numeric_limits<T>::is_signed)>
+__host__ __device__ T add_safe(T const& value, T const& delta)
+{
+  // delta >= 0.
+  return ((cuda::std::numeric_limits<T>::max() - value) >= delta)
+           ? (value + delta)
+           : cuda::std::numeric_limits<T>::max();
+}
+
+/**
+ * @brief Subtract `delta` from value, and cap at numeric_limits::lowest(), for signed types.
+ *
+ * Note: We use numeric_limits::lowest() instead of min() because for floats, lowest() returns
+ * the smallest finite value, as opposed to min() which returns the smallest _positive_ value.
+ */
+template <typename T, CUDF_ENABLE_IF(cuda::std::numeric_limits<T>::is_signed)>
+__host__ __device__ T subtract_safe(T const& value, T const& delta)
+{
+  if constexpr (std::is_floating_point_v<T>) {
+    if (std::isinf(value) or std::isnan(value)) { return value; }
+  }
+  // delta >= 0;
+  return (value >= 0 || (value - cuda::std::numeric_limits<T>::lowest()) >= delta)
+           ? (value - delta)
+           : cuda::std::numeric_limits<T>::lowest();
+}
+
+/**
+ * @brief Subtract `delta` from value, and cap at numeric_limits::lowest(), for unsigned types.
+ *
+ * Note: We use numeric_limits::lowest() instead of min() because for floats, lowest() returns
+ * the smallest finite value, as opposed to min() which returns the smallest _positive_ value.
+ *
+ * This distinction isn't truly relevant for this overload (because float is signed).
+ * lowest() is kept for uniformity.
+ */
+template <typename T, CUDF_ENABLE_IF(not cuda::std::numeric_limits<T>::is_signed)>
+__host__ __device__ T subtract_safe(T const& value, T const& delta)
+{
+  // delta >= 0;
+  return ((value - cuda::std::numeric_limits<T>::lowest()) >= delta)
+           ? (value - delta)
+           : cuda::std::numeric_limits<T>::lowest();
+}
+
+/**
+ * @brief Comparator for numeric order-by columns, handling floating point NaN values.
+ *
+ * This is required for binary search through sorted vectors that contain NaN values.
+ * With ascending sort, NaN values are stored at the end of the sequence, even
+ * greater than infinity.
+ * But thrust::less would have trouble locating it because:
+ * 1. thrust::less(NaN, 10) returns false
+ * 2. thrust::less(10, NaN) also returns false
+ *
+ * This comparator honours the position of NaN values vis-à-vis non-NaN values.
+ *
+ */
+struct nan_aware_less {
+  template <typename T, CUDF_ENABLE_IF(not cudf::is_floating_point<T>())>
+  __host__ __device__ bool operator()(T const& lhs, T const& rhs) const
+  {
+    return thrust::less<T>{}(lhs, rhs);
+  }
+
+  template <typename T, CUDF_ENABLE_IF(cudf::is_floating_point<T>())>
+  __host__ __device__ bool operator()(T const& lhs, T const& rhs) const
+  {
+    if (std::isnan(lhs)) { return false; }
+    return std::isnan(rhs) ? true : thrust::less<T>{}(lhs, rhs);
+  }
+};
+
+/**
+ * @brief Comparator for numeric order-by columns, handling floating point NaN values. *
+ *
+ * This is required for binary search through sorted vectors that contain NaN values.
+ * With descending sort, NaN values are stored at the beginning of the sequence, even
+ * greater than infinity.
+ * But thrust::greater would have trouble locating it because:
+ * 1. thrust::greater(NaN, 10) returns false
+ * 2. thrust::greater(10, NaN) also returns false
+ *
+ * This comparator honours the position of NaN values vis-à-vis non-NaN values.
+ *
+ */
+struct nan_aware_greater {
+  template <typename T>
+  __host__ __device__ bool operator()(T const& lhs, T const& rhs) const
+  {
+    return nan_aware_less{}(rhs, lhs);
+  }
+};
+}  // namespace cudf::detail
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "detail/range_comparator_utils.cuh"
 #include "detail/range_window_bounds.hpp"
 #include "detail/rolling.cuh"
 #include "detail/rolling_jit.hpp"
@@ -218,67 +219,6 @@ std::unique_ptr<column> grouped_rolling_window(table_view const& group_keys,
 
 namespace {
 
-/// For order-by columns of signed types, bounds calculation might cause accidental
-/// overflow/underflows. This needs to be detected and handled appropriately
-/// for signed and unsigned types.
-
-/**
- * @brief Add `delta` to value, and cap at numeric_limits::max(), for signed types.
- */
-template <typename T, CUDF_ENABLE_IF(cuda::std::numeric_limits<T>::is_signed)>
-__device__ T add_safe(T const& value, T const& delta)
-{
-  // delta >= 0.
-  return (value < 0 || (cuda::std::numeric_limits<T>::max() - value) >= delta)
-           ? (value + delta)
-           : cuda::std::numeric_limits<T>::max();
-}
-
-/**
- * @brief Add `delta` to value, and cap at numeric_limits::max(), for unsigned types.
- */
-template <typename T, CUDF_ENABLE_IF(not cuda::std::numeric_limits<T>::is_signed)>
-__device__ T add_safe(T const& value, T const& delta)
-{
-  // delta >= 0.
-  return ((cuda::std::numeric_limits<T>::max() - value) >= delta)
-           ? (value + delta)
-           : cuda::std::numeric_limits<T>::max();
-}
-
-/**
- * @brief Subtract `delta` from value, and cap at numeric_limits::lowest(), for signed types.
- *
- * Note: We use numeric_limits::lowest() instead of min() because for floats, lowest() returns
- * the smallest finite value, as opposed to min() which returns the smallest _positive_ value.
- */
-template <typename T, CUDF_ENABLE_IF(cuda::std::numeric_limits<T>::is_signed)>
-__device__ T subtract_safe(T const& value, T const& delta)
-{
-  // delta >= 0;
-  return (value >= 0 || (value - cuda::std::numeric_limits<T>::lowest()) >= delta)
-           ? (value - delta)
-           : cuda::std::numeric_limits<T>::lowest();
-}
-
-/**
- * @brief Subtract `delta` from value, and cap at numeric_limits::lowest(), for unsigned types.
- *
- * Note: We use numeric_limits::lowest() instead of min() because for floats, lowest() returns
- * the smallest finite value, as opposed to min() which returns the smallest _positive_ value.
- *
- * This distinction isn't truly relevant for this overload (because float is signed).
- * lowest() is kept for uniformity.
- */
-template <typename T, CUDF_ENABLE_IF(not cuda::std::numeric_limits<T>::is_signed)>
-__device__ T subtract_safe(T const& value, T const& delta)
-{
-  // delta >= 0;
-  return ((value - cuda::std::numeric_limits<T>::lowest()) >= delta)
-           ? (value - delta)
-           : cuda::std::numeric_limits<T>::lowest();
-}
-
 /**
  * @brief For a specified idx, find the lowest value of the (sorted) orderby column that
  * participates in a range-window query.
@@ -291,7 +231,7 @@ __device__ ElementT compute_lowest_in_window(ElementIter orderby_iter,
   if constexpr (std::is_same_v<ElementT, cudf::string_view>) {
     return orderby_iter[idx];
   } else {
-    return subtract_safe(orderby_iter[idx], delta);
+    return cudf::detail::subtract_safe(orderby_iter[idx], delta);
   }
 }
 
@@ -307,7 +247,7 @@ __device__ ElementT compute_highest_in_window(ElementIter orderby_iter,
   if constexpr (std::is_same_v<ElementT, cudf::string_view>) {
     return orderby_iter[idx];
   } else {
-    return add_safe(orderby_iter[idx], delta);
+    return cudf::detail::add_safe(orderby_iter[idx], delta);
   }
 }
 
@@ -436,7 +376,8 @@ std::unique_ptr<column> range_window_ASC(column_view const& input,
     return ((d_orderby + idx) - thrust::lower_bound(thrust::seq,
                                                     d_orderby + group_start,
                                                     d_orderby + idx,
-                                                    lowest_in_window)) +
+                                                    lowest_in_window,
+                                                    cudf::detail::nan_aware_less{})) +
            1;  // Add 1, for `preceding` to account for current row.
   };
 
@@ -466,8 +407,11 @@ std::unique_ptr<column> range_window_ASC(column_view const& input,
     auto const group_end         = nulls_begin_idx == 0 ? num_rows : nulls_begin_idx;
     auto const highest_in_window = compute_highest_in_window(d_orderby, idx, following_window);
 
-    return (thrust::upper_bound(
-              thrust::seq, d_orderby + idx, d_orderby + group_end, highest_in_window) -
+    return (thrust::upper_bound(thrust::seq,
+                                d_orderby + idx,
+                                d_orderby + group_end,
+                                highest_in_window,
+                                cudf::detail::nan_aware_less{}) -
             (d_orderby + idx)) -
            1;
   };
@@ -612,7 +556,8 @@ std::unique_ptr<column> range_window_ASC(column_view const& input,
     return ((d_orderby + idx) - thrust::lower_bound(thrust::seq,
                                                     d_orderby + search_start,
                                                     d_orderby + idx,
-                                                    lowest_in_window)) +
+                                                    lowest_in_window,
+                                                    cudf::detail::nan_aware_less{})) +
            1;  // Add 1, for `preceding` to account for current row.
   };
 
@@ -653,8 +598,11 @@ std::unique_ptr<column> range_window_ASC(column_view const& input,
     auto const search_end        = nulls_begin == group_start ? group_end : nulls_begin;
     auto const highest_in_window = compute_highest_in_window(d_orderby, idx, following_window);
 
-    return (thrust::upper_bound(
-              thrust::seq, d_orderby + idx, d_orderby + search_end, highest_in_window) -
+    return (thrust::upper_bound(thrust::seq,
+                                d_orderby + idx,
+                                d_orderby + search_end,
+                                highest_in_window,
+                                cudf::detail::nan_aware_less{}) -
             (d_orderby + idx)) -
            1;
   };
@@ -710,12 +658,11 @@ std::unique_ptr<column> range_window_DESC(column_view const& input,
     auto const group_start       = nulls_begin_idx == 0 ? nulls_end_idx : 0;
     auto const highest_in_window = compute_highest_in_window(d_orderby, idx, preceding_window);
 
-    return ((d_orderby + idx) -
-            thrust::lower_bound(thrust::seq,
-                                d_orderby + group_start,
-                                d_orderby + idx,
-                                highest_in_window,
-                                thrust::greater<decltype(highest_in_window)>())) +
+    return ((d_orderby + idx) - thrust::lower_bound(thrust::seq,
+                                                    d_orderby + group_start,
+                                                    d_orderby + idx,
+                                                    highest_in_window,
+                                                    cudf::detail::nan_aware_greater{})) +
            1;  // Add 1, for `preceding` to account for current row.
   };
 
@@ -749,7 +696,7 @@ std::unique_ptr<column> range_window_DESC(column_view const& input,
                                 d_orderby + idx,
                                 d_orderby + group_end,
                                 lowest_in_window,
-                                thrust::greater<decltype(lowest_in_window)>()) -
+                                cudf::detail::nan_aware_greater{}) -
             (d_orderby + idx)) -
            1;
   };
@@ -810,12 +757,11 @@ std::unique_ptr<column> range_window_DESC(column_view const& input,
     auto const search_start      = nulls_begin == group_start ? nulls_end : group_start;
     auto const highest_in_window = compute_highest_in_window(d_orderby, idx, preceding_window);
 
-    return ((d_orderby + idx) -
-            thrust::lower_bound(thrust::seq,
-                                d_orderby + search_start,
-                                d_orderby + idx,
-                                highest_in_window,
-                                thrust::greater<decltype(highest_in_window)>())) +
+    return ((d_orderby + idx) - thrust::lower_bound(thrust::seq,
+                                                    d_orderby + search_start,
+                                                    d_orderby + idx,
+                                                    highest_in_window,
+                                                    cudf::detail::nan_aware_greater{})) +
            1;  // Add 1, for `preceding` to account for current row.
   };
 
@@ -857,7 +803,7 @@ std::unique_ptr<column> range_window_DESC(column_view const& input,
                                 d_orderby + idx,
                                 d_orderby + search_end,
                                 lowest_in_window,
-                                thrust::greater<decltype(lowest_in_window)>()) -
+                                cudf::detail::nan_aware_greater{}) -
             (d_orderby + idx)) -
            1;
   };

@@ -446,6 +446,7 @@ ConfigureTest(
   rolling/grouped_rolling_test.cpp
   rolling/lead_lag_test.cpp
   rolling/nth_element_test.cpp
+  rolling/range_comparator_test.cu
   rolling/range_rolling_window_test.cpp
   rolling/range_window_bounds_test.cpp
   rolling/rolling_test.cpp