Skip to content

Commit

Permalink
Adds the Logical Stack algorithm (NVIDIA#11078)
Browse files Browse the repository at this point in the history
**Description**
This PR adds the _Logical Stack_, an algorithm required by the JSON parser. The _Logical Stack_ takes a sequence of stack operations (i.e., `push(X)`, `pop()`, `read()`) as if they were to be applied to a regular `stack` data structure in the given order. For each operation within that sequence, the algorithm resolves the stack state and writes out the item that is on top of the stack before such operation is applied. As, for some operations, the stack may be empty, the algorithm uses a user-specified _sentinel_ symbol to represent the "empty-stack" (i.e., there is no item on top of the stack). 

**How the _Logical Stack_ is implemented is illustrated in this presentation:**
https://docs.google.com/presentation/d/16r-0SlQFd-7fH2R7I06tc_JqsAd_0GrTgh_q20sJ2ak/edit?usp=sharing

The only deviation from the algorithm presented in the slides is the optimisation of a sparse sequence of stack operations. That is, in case of the _JSON Parser_, we only pass symbols that actually push or pop (i.e., `{`, `[`, `}`, and `]`) along with the index at which that operation occurred. Symbols that follow a stack operation that pushes or pops are filled with the symbol that is inferred as top-of-stack symbol of such operation.

Results from intermediate processing steps can be dumped to `stdout` by setting:
```
export CUDA_DBG_DUMP=1
```

**For instance:**
```
//            0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20
d_input  = "  [  {  }  ,  n  u  l  l  ,  [  1  ,  2  ,  3  ]  ,  [  1  ]  ]  ";

// This is the sparse representation we feed into the logical stack
// The algorithm's contract is: positions not present in the sparse list are reading the top-of-the stack
d_symbols  = "  [  {  }  [  ]  [  ]  ]  "
d_indexes  = "  0  1  2  9 15 17 19 20  "

// Function object used for classifying the kind of stack operation a symbol represents
struct ToStackOp {
  __host__ __device__ fst::stack_op_type operator()(
    char const& symbol) const
  {
    return symbol == '[' ? fst::stack_op_type::PUSH : symbol == ']' ? fst::stack_op_type::POP : fst::stack_op_type::READ;
  }
};

// The symbol that we'll put whenever there's nothing on the stack
auto empty_stack_symbol = '_';

// A symbol that does not push
auto read_symbol = 'x';

// Type sufficiently large to cover [-max_stack_level, max_stack_level]
using stack_level_t = int8_t;
fst::sparse_stack_op_to_top_of_stack<stack_level_t>(
                          d_symbols,
                          d_indexes,
                          ToStackOp{},
                          d_top_of_stack_out,
                          empty_stack_symbol,
                          read_symbol,
                          d_symbols.size(), // input size (num. items in sparse representation)
                          d_input.size(),   // output size (num. items in dense representation)
                          stream);

// The output represents the symbol that was on top of the stack prior to applying the stack operation
d_input             = "  [  {  }  ,  n  u  l  l  ,  [  1  ,  2  ,  3  ]  ,  [  1  ]  ]  "; // <<-- original input
d_top_of_stack_out  = "  _  [  {  [  [  [  [  [  [  [  [  [  [  [  [  [  [  [  [  [  [  "; // <<-- logical stack output
```

Authors:
  - Elias Stehle (https://github.com/elstehle)

Approvers:
  - Yunsong Wang (https://github.com/PointKernel)
  - Karthikeyan (https://github.com/karthikeyann)

URL: rapidsai/cudf#11078
  • Loading branch information
elstehle authored Jul 11, 2022
1 parent 073cbd8 commit 8c39130
Show file tree
Hide file tree
Showing 4 changed files with 858 additions and 0 deletions.
140 changes: 140 additions & 0 deletions cpp/include/cudf_test/print_utilities.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cudf/types.hpp>
#include <cudf/utilities/traits.hpp>

#include <rmm/cuda_stream_view.hpp>

#include <thrust/iterator/transform_iterator.h>

#include <type_traits>

namespace cudf::test::print {

constexpr int32_t hex_tag = 0;

template <int32_t TagT, typename T>
struct TaggedType {
T v;
};

template <typename T>
using hex_t = TaggedType<hex_tag, T>;

/**
* @brief Function object to transform a built-in type to a tagged type (e.g., in order to print
* values from an iterator returning uint32_t as hex values)
*
* @tparam TaggedTypeT A TaggedType template specialisation
*/
template <typename TaggedTypeT>
struct ToTaggedType {
template <typename T>
CUDF_HOST_DEVICE TaggedTypeT operator()(T const& v) const
{
return TaggedTypeT{v};
}
};

/**
* @brief Returns an iterator that causes the values from \p it to be printed as hex values.
*
* @tparam InItT A random-access input iterator type
* @param it A random-access input iterator t
* @return
*/
template <typename InItT>
auto hex(InItT it)
{
using value_t = typename std::iterator_traits<InItT>::value_type;
using tagged_t = hex_t<value_t>;
return thrust::make_transform_iterator(it, ToTaggedType<tagged_t>{});
}

template <typename T, CUDF_ENABLE_IF(std::is_integral_v<T>&& std::is_signed_v<T>)>
CUDF_HOST_DEVICE void print_value(int32_t width, T arg)
{
printf("%*d", width, arg);
}

template <typename T, CUDF_ENABLE_IF(std::is_integral_v<T>&& std::is_unsigned_v<T>)>
CUDF_HOST_DEVICE void print_value(int32_t width, T arg)
{
printf("%*d", width, arg);
}

CUDF_HOST_DEVICE void print_value(int32_t width, char arg) { printf("%*c", width, arg); }

template <typename T>
CUDF_HOST_DEVICE void print_value(int32_t width, hex_t<T> arg)
{
printf("%*X", width, arg.v);
}

namespace detail {
template <typename T>
CUDF_HOST_DEVICE void print_values(int32_t width, char delimiter, T arg)
{
print_value(width, arg);
}

template <typename T, typename... Ts>
CUDF_HOST_DEVICE void print_values(int32_t width, char delimiter, T arg, Ts... args)
{
print_value(width, arg);
if (delimiter) printf("%c", delimiter);
print_values(width, delimiter, args...);
}

template <typename... Ts>
__global__ void print_array_kernel(std::size_t count, int32_t width, char delimiter, Ts... args)
{
if (threadIdx.x == 0 && blockIdx.x == 0) {
for (std::size_t i = 0; i < count; i++) {
printf("%6lu: ", i);
print_values(width, delimiter, args[i]...);
printf("\n");
}
}
}
} // namespace detail

/**
* @brief Prints \p count elements from each of the given device-accessible iterators.
*
* @param count The number of items to print from each device-accessible iterator
* @param stream The cuda stream to which the printing kernel shall be dispatched
* @param args List of iterators to be printed
*/
template <typename... Ts>
void print_array(std::size_t count, rmm::cuda_stream_view stream, Ts... args)
{
// The width to pad printed numbers to
constexpr int32_t width = 6;

// Delimiter used for separating values from subsequent iterators
constexpr char delimiter = ',';

// TODO we want this to compile to nothing dependnig on compiler flag, rather than runtime
if (std::getenv("CUDA_DBG_DUMP") != nullptr) {
detail::print_array_kernel<<<1, 1, 0, stream.value()>>>(count, width, delimiter, args...);
}
}

} // namespace cudf::test::print
Loading

0 comments on commit 8c39130

Please sign in to comment.