Skip to content

Commit

Permalink
address comments
Browse files Browse the repository at this point in the history
Signed-off-by: Haoyang Li <haoyangl@nvidia.com>
  • Loading branch information
thirtiseven committed May 6, 2024
1 parent 1f094be commit 28387a9
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 80 deletions.
81 changes: 3 additions & 78 deletions src/main/cpp/src/get_json_object.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,35 +15,28 @@
*/

#include "get_json_object.hpp"
#include "json_parser.cuh"

#include <cudf/column/column_device_view.cuh>
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/copy.hpp>
#include <cudf/detail/get_value.cuh>
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/offsets_iterator_factory.cuh>
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/json/json.hpp>
#include <cudf/lists/list_device_view.cuh>
#include <cudf/lists/lists_column_device_view.cuh>
#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/strings/detail/utilities.hpp>
#include <cudf/strings/string_view.cuh>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/types.hpp>
#include <cudf/utilities/bit.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/optional.h>
#include <thrust/pair.h>
#include <thrust/scan.h>
#include <thrust/tuple.h>

namespace spark_rapids_jni {
Expand Down Expand Up @@ -833,31 +826,6 @@ rmm::device_uvector<path_instruction> construct_path_commands(
return cudf::detail::make_device_uvector_sync(path_commands, stream, mr);
}

__device__ thrust::pair<bool, size_t> get_json_object_size_single(
char const* input,
cudf::size_type input_len,
cudf::device_span<path_instruction const> path_commands)
{
json_parser j_parser(input, input_len);
j_parser.next_token();
// JSON validation check
if (json_token::ERROR == j_parser.get_current_token()) { return {false, 0}; }

json_generator generator(nullptr);

bool const success = evaluate_path(
j_parser, generator, write_style::RAW, {path_commands.data(), path_commands.size()});

if (!success) {
// generator may contain trash output, e.g.: generator writes some output,
// then JSON format is invalid, the previous output becomes trash.
// set output as zero to tell second step
generator.set_output_len_zero();
}

return {success, generator.get_output_len()};
}

/**
* @brief Parse a single json string using the provided command buffer
*
Expand Down Expand Up @@ -902,49 +870,6 @@ __device__ thrust::pair<bool, size_t> get_json_object_single(
return {success, generator.get_output_len()};
}

/**
* @brief Kernel for running the JSONPath query.
*
* This kernel operates in a 2-pass way. On the first pass it computes the
* output sizes. On the second pass, it fills in the provided output buffers
* (chars and validity).
*
* @param col Device view of the incoming string
* @param commands JSONPath command buffer
* @param output_offsets Buffer used to store the string offsets for the results
* of the query
* @param out_buf Buffer used to store the results of the query
* @param out_validity Output validity buffer
* @param out_valid_count Output count of # of valid bits
* @param options Options controlling behavior
*/
template <int block_size>
__launch_bounds__(block_size) CUDF_KERNEL
void get_json_object_size_kernel(cudf::column_device_view col,
cudf::device_span<path_instruction const> path_commands,
cudf::size_type* d_sizes,
cudf::detail::input_offsetalator output_offsets)
{
auto tid = cudf::detail::grid_1d::global_thread_id();

if (tid < col.size()) {
cudf::string_view const str = col.element<cudf::string_view>(tid);
if (str.size_bytes() > 0) {
// process one single row
auto [result, output_size] = get_json_object_size_single(
str.data(), str.size_bytes(), {path_commands.data(), path_commands.size()});

// filled in only during the precompute step. during the compute step, the
// offsets are fed back in so we do -not- want to write them out
d_sizes[tid] = static_cast<cudf::size_type>(output_size);
} else {
// valid JSON length is always greater than 0
// if `str` size len is zero, output len is 0 and `is_valid` is false
d_sizes[tid] = 0;
}
}
}

/**
* @brief Kernel for running the JSONPath query.
*
Expand Down Expand Up @@ -1048,9 +973,9 @@ std::unique_ptr<cudf::column> get_json_object(
cudf::detail::grid_1d const grid{input.size(), block_size};
auto d_input_ptr = cudf::column_device_view::create(input.parent(), stream);
// preprocess sizes (returned in the offsets buffer)
get_json_object_size_kernel<block_size>
get_json_object_kernel<block_size>
<<<grid.num_blocks, grid.num_threads_per_block, 0, stream.value()>>>(
*d_input_ptr, path_commands, sizes.data(), d_offsets);
*d_input_ptr, path_commands, sizes.data(), d_offsets, nullptr, nullptr, nullptr);

// convert sizes to offsets
auto [offsets, output_size] =
Expand Down
2 changes: 0 additions & 2 deletions src/main/cpp/src/get_json_object.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@

#pragma once

#include "json_parser.cuh"

#include <cudf/strings/string_view.hpp>
#include <cudf/strings/strings_column_view.hpp>

Expand Down

0 comments on commit 28387a9

Please sign in to comment.