Adds the Logical Stack algorithm (NVIDIA#11078)

**Description** This PR adds the _Logical Stack_, an algorithm required by the JSON parser. The _Logical Stack_ takes a sequence of stack operations (i.e., `push(X)`, `pop()`, `read()`) as if they were to be applied to a regular `stack` data structure in the given order. For each operation within that sequence, the algorithm resolves the stack state and writes out the item that is on top of the stack before such operation is applied. As, for some operations, the stack may be empty, the algorithm uses a user-specified _sentinel_ symbol to represent the "empty-stack" (i.e., there is no item on top of the stack). **How the _Logical Stack_ is implemented is illustrated in this presentation:** https://docs.google.com/presentation/d/16r-0SlQFd-7fH2R7I06tc_JqsAd_0GrTgh_q20sJ2ak/edit?usp=sharing The only deviation from the algorithm presented in the slides is the optimisation of a sparse sequence of stack operations. That is, in case of the _JSON Parser_, we only pass symbols that actually push or pop (i.e., `{`, `[`, `}`, and `]`) along with the index at which that operation occurred. Symbols that follow a stack operation that pushes or pops are filled with the symbol that is inferred as top-of-stack symbol of such operation. Results from intermediate processing steps can be dumped to `stdout` by setting: ``` export CUDA_DBG_DUMP=1 ``` **For instance:** ``` // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 d_input = " [ { } , n u l l , [ 1 , 2 , 3 ] , [ 1 ] ] "; // This is the sparse representation we feed into the logical stack // The algorithm's contract is: positions not present in the sparse list are reading the top-of-the stack d_symbols = " [ { } [ ] [ ] ] " d_indexes = " 0 1 2 9 15 17 19 20 " // Function object used for classifying the kind of stack operation a symbol represents struct ToStackOp { __host__ __device__ fst::stack_op_type operator()( char const& symbol) const { return symbol == '[' ? fst::stack_op_type::PUSH : symbol == ']' ? fst::stack_op_type::POP : fst::stack_op_type::READ; } }; // The symbol that we'll put whenever there's nothing on the stack auto empty_stack_symbol = '_'; // A symbol that does not push auto read_symbol = 'x'; // Type sufficiently large to cover [-max_stack_level, max_stack_level] using stack_level_t = int8_t; fst::sparse_stack_op_to_top_of_stack<stack_level_t>( d_symbols, d_indexes, ToStackOp{}, d_top_of_stack_out, empty_stack_symbol, read_symbol, d_symbols.size(), // input size (num. items in sparse representation) d_input.size(), // output size (num. items in dense representation) stream); // The output represents the symbol that was on top of the stack prior to applying the stack operation d_input = " [ { } , n u l l , [ 1 , 2 , 3 ] , [ 1 ] ] "; // <<-- original input d_top_of_stack_out = " _ [ { [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ [ "; // <<-- logical stack output ``` Authors: - Elias Stehle (https://github.com/elstehle) Approvers: - Yunsong Wang (https://github.com/PointKernel) - Karthikeyan (https://github.com/karthikeyann) URL: rapidsai/cudf#11078
tgravescs · Jul 11, 2022 · 8c39130 · 8c39130
1 parent 073cbd8
commit 8c39130
Show file tree

Hide file tree

Showing 4 changed files with 858 additions and 0 deletions.
diff --git a/cpp/include/cudf_test/print_utilities.cuh b/cpp/include/cudf_test/print_utilities.cuh
@@ -0,0 +1,140 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/types.hpp>
+#include <cudf/utilities/traits.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+
+#include <thrust/iterator/transform_iterator.h>
+
+#include <type_traits>
+
+namespace cudf::test::print {
+
+constexpr int32_t hex_tag = 0;
+
+template <int32_t TagT, typename T>
+struct TaggedType {
+  T v;
+};
+
+template <typename T>
+using hex_t = TaggedType<hex_tag, T>;
+
+/**
+ * @brief Function object to transform a built-in type to a tagged type (e.g., in order to print
+ * values from an iterator returning uint32_t as hex values)
+ *
+ * @tparam TaggedTypeT A TaggedType template specialisation
+ */
+template <typename TaggedTypeT>
+struct ToTaggedType {
+  template <typename T>
+  CUDF_HOST_DEVICE TaggedTypeT operator()(T const& v) const
+  {
+    return TaggedTypeT{v};
+  }
+};
+
+/**
+ * @brief Returns an iterator that causes the values from \p it to be printed as hex values.
+ *
+ * @tparam InItT A random-access input iterator type
+ * @param it A random-access input iterator t
+ * @return
+ */
+template <typename InItT>
+auto hex(InItT it)
+{
+  using value_t  = typename std::iterator_traits<InItT>::value_type;
+  using tagged_t = hex_t<value_t>;
+  return thrust::make_transform_iterator(it, ToTaggedType<tagged_t>{});
+}
+
+template <typename T, CUDF_ENABLE_IF(std::is_integral_v<T>&& std::is_signed_v<T>)>
+CUDF_HOST_DEVICE void print_value(int32_t width, T arg)
+{
+  printf("%*d", width, arg);
+}
+
+template <typename T, CUDF_ENABLE_IF(std::is_integral_v<T>&& std::is_unsigned_v<T>)>
+CUDF_HOST_DEVICE void print_value(int32_t width, T arg)
+{
+  printf("%*d", width, arg);
+}
+
+CUDF_HOST_DEVICE void print_value(int32_t width, char arg) { printf("%*c", width, arg); }
+
+template <typename T>
+CUDF_HOST_DEVICE void print_value(int32_t width, hex_t<T> arg)
+{
+  printf("%*X", width, arg.v);
+}
+
+namespace detail {
+template <typename T>
+CUDF_HOST_DEVICE void print_values(int32_t width, char delimiter, T arg)
+{
+  print_value(width, arg);
+}
+
+template <typename T, typename... Ts>
+CUDF_HOST_DEVICE void print_values(int32_t width, char delimiter, T arg, Ts... args)
+{
+  print_value(width, arg);
+  if (delimiter) printf("%c", delimiter);
+  print_values(width, delimiter, args...);
+}
+
+template <typename... Ts>
+__global__ void print_array_kernel(std::size_t count, int32_t width, char delimiter, Ts... args)
+{
+  if (threadIdx.x == 0 && blockIdx.x == 0) {
+    for (std::size_t i = 0; i < count; i++) {
+      printf("%6lu: ", i);
+      print_values(width, delimiter, args[i]...);
+      printf("\n");
+    }
+  }
+}
+}  // namespace detail
+
+/**
+ * @brief Prints \p count elements from each of the given device-accessible iterators.
+ *
+ * @param count The number of items to print from each device-accessible iterator
+ * @param stream The cuda stream to which the printing kernel shall be dispatched
+ * @param args List of iterators to be printed
+ */
+template <typename... Ts>
+void print_array(std::size_t count, rmm::cuda_stream_view stream, Ts... args)
+{
+  // The width to pad printed numbers to
+  constexpr int32_t width = 6;
+
+  // Delimiter used for separating values from subsequent iterators
+  constexpr char delimiter = ',';
+
+  // TODO we want this to compile to nothing dependnig on compiler flag, rather than runtime
+  if (std::getenv("CUDA_DBG_DUMP") != nullptr) {
+    detail::print_array_kernel<<<1, 1, 0, stream.value()>>>(count, width, delimiter, args...);
+  }
+}
+
+}  // namespace cudf::test::print