Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement interleave_columns for structs columns #9012

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
133 changes: 106 additions & 27 deletions cpp/src/reshape/interleave_columns.cu
Original file line number Diff line number Diff line change
Expand Up @@ -29,32 +29,111 @@
namespace cudf {
namespace detail {
namespace {
struct interleave_columns_functor {
template <typename T, typename... Args>
std::enable_if_t<not cudf::is_fixed_width<T>() and not std::is_same_v<T, cudf::string_view> and
not std::is_same_v<T, cudf::list_view>,
std::unique_ptr<cudf::column>>
operator()(Args&&...)
// Error case when no other overload or specialization is available
template <typename T, typename Enable = void>
struct interleave_columns_impl {
template <typename... Args>
std::unique_ptr<column> operator()(Args&&...)
{
CUDF_FAIL("Called `interleave_columns` on none-supported data type.");
CUDF_FAIL("Unsupported type in `interleave_columns`.");
}
};

struct interleave_columns_functor {
template <typename T>
std::enable_if_t<std::is_same_v<T, cudf::list_view>, std::unique_ptr<cudf::column>> operator()(
table_view const& lists_columns,
bool create_mask,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
std::unique_ptr<cudf::column> operator()(table_view const& input,
bool create_mask,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return interleave_columns_impl<T>{}(input, create_mask, stream, mr);
}
};

template <typename T>
struct interleave_columns_impl<T, typename std::enable_if_t<std::is_same_v<T, cudf::list_view>>> {
std::unique_ptr<column> operator()(table_view const& lists_columns,
bool create_mask,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
return lists::detail::interleave_columns(lists_columns, create_mask, stream, mr);
}
};

template <typename T>
std::enable_if_t<std::is_same_v<T, cudf::string_view>, std::unique_ptr<cudf::column>> operator()(
table_view const& strings_columns,
bool create_mask,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
template <typename T>
struct interleave_columns_impl<T, typename std::enable_if_t<std::is_same_v<T, cudf::struct_view>>> {
std::unique_ptr<cudf::column> operator()(table_view const& structs_columns,
bool create_mask,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
// We can safely call `column(0)` as the number of columns is known to be non zero.
auto const num_children = structs_columns.column(0).num_children();
CUDF_EXPECTS(
std::all_of(structs_columns.begin(),
structs_columns.end(),
[num_children](auto const& col) { return col.num_children() == num_children; }),
"Number of children of the input structs columns must be the same");

auto const num_columns = structs_columns.num_columns();
auto const num_rows = structs_columns.num_rows();
auto const output_size = num_columns * num_rows;

// Interleave the children of the structs columns.
std::vector<std::unique_ptr<cudf::column>> output_struct_members;
for (size_type child_idx = 0; child_idx < num_children; ++child_idx) {
// Collect children columns from the input structs columns at index `child_idx`.
auto const child_iter =
thrust::make_transform_iterator(structs_columns.begin(), [child_idx](auto const& col) {
return structs_column_view(col).get_sliced_child(child_idx);
});
auto children = std::vector<column_view>(child_iter, child_iter + num_columns);

auto const child_type = children.front().type();
CUDF_EXPECTS(
std::all_of(children.cbegin(),
children.cend(),
[child_type](auto const& col) { return child_type == col.type(); }),
"Children of the input structs columns at the same child index must have the same type");

auto const children_nullable = std::any_of(
children.cbegin(), children.cend(), [](auto const& col) { return col.nullable(); });
output_struct_members.emplace_back(
type_dispatcher<dispatch_storage_type>(child_type,
interleave_columns_functor{},
table_view{std::move(children)},
children_nullable,
stream,
mr));
}

auto const create_mask_fn = [&] {
auto const input_dv_ptr = table_device_view::create(structs_columns);
auto const validity_fn = [input_dv = *input_dv_ptr, num_columns] __device__(auto const idx) {
return input_dv.column(idx % num_columns).is_valid(idx / num_columns);
};
return cudf::detail::valid_if(thrust::make_counting_iterator<size_type>(0),
thrust::make_counting_iterator<size_type>(output_size),
validity_fn,
stream,
mr);
};

// Only create null mask if at least one input structs column is nullable.
auto [null_mask, null_count] =
create_mask ? create_mask_fn() : std::pair{rmm::device_buffer{0, stream, mr}, size_type{0}};
return make_structs_column(
output_size, std::move(output_struct_members), null_count, std::move(null_mask), stream, mr);
}
};

template <typename T>
struct interleave_columns_impl<T, typename std::enable_if_t<std::is_same_v<T, cudf::string_view>>> {
std::unique_ptr<cudf::column> operator()(table_view const& strings_columns,
bool create_mask,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto num_columns = strings_columns.num_columns();
if (num_columns == 1) // Single strings column returns a copy
Expand Down Expand Up @@ -106,7 +185,7 @@ struct interleave_columns_functor {
cudf::detail::get_value<int32_t>(offsets_column->view(), num_strings, stream);
auto chars_column = strings::detail::create_chars_child_column(bytes, stream, mr);
// Fill the chars column
auto d_results_chars = chars_column->mutable_view().data<char>();
auto d_results_chars = chars_column->mutable_view().template data<char>();
thrust::for_each_n(
rmm::exec_policy(stream),
thrust::make_counting_iterator<size_type>(0),
Expand All @@ -132,13 +211,14 @@ struct interleave_columns_functor {
stream,
mr);
}
};

template <typename T>
std::enable_if_t<cudf::is_fixed_width<T>(), std::unique_ptr<cudf::column>> operator()(
table_view const& input,
bool create_mask,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
template <typename T>
struct interleave_columns_impl<T, typename std::enable_if_t<cudf::is_fixed_width<T>()>> {
std::unique_ptr<cudf::column> operator()(table_view const& input,
bool create_mask,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
auto arch_column = input.column(0);
auto output_size = input.num_columns() * input.num_rows();
Expand Down Expand Up @@ -193,11 +273,10 @@ std::unique_ptr<column> interleave_columns(table_view const& input,
CUDF_EXPECTS(input.num_columns() > 0, "input must have at least one column to determine dtype.");

auto const dtype = input.column(0).type();

CUDF_EXPECTS(std::all_of(std::cbegin(input),
std::cend(input),
[dtype](auto const& col) { return dtype == col.type(); }),
"DTYPE mismatch");
"Input columns must have the same type");

auto const output_needs_mask = std::any_of(
std::cbegin(input), std::cend(input), [](auto const& col) { return col.nullable(); });
Expand Down
Loading