diff --git a/CHANGELOG.md b/CHANGELOG.md index fb4e664e0f6..b650663d64e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,8 @@ - PR #6768 Add support for scatter() on list columns - PR #6796 Add create_metadata_file in dask_cudf - PR #6765 Cupy fallback for __array_function__ and __array_ufunc__ for cudf.Series +- PR #6817 Add support for scatter() on lists-of-struct columns +- PR #6805 Implement `cudf::detail::copy_if` for `decimal32` and `decimal64` ## Improvements diff --git a/cpp/include/cudf/lists/detail/scatter.cuh b/cpp/include/cudf/lists/detail/scatter.cuh index 1de4461f703..ebee8ea5168 100644 --- a/cpp/include/cudf/lists/detail/scatter.cuh +++ b/cpp/include/cudf/lists/detail/scatter.cuh @@ -321,7 +321,8 @@ struct list_child_constructor { template struct is_supported_child_type { static const bool value = cudf::is_fixed_width() || std::is_same::value || - std::is_same::value; + std::is_same::value || + std::is_same::value; }; public: @@ -617,6 +618,100 @@ struct list_child_constructor { stream.value(), mr); } + + /** + * @brief (Recursively) constructs child columns that are structs. + */ + template + std::enable_if_t::value, std::unique_ptr> operator()( + rmm::device_uvector const& list_vector, + cudf::column_view const& list_offsets, + cudf::lists_column_view const& source_lists_column_view, + cudf::lists_column_view const& target_lists_column_view, + rmm::cuda_stream_view stream, + rmm::mr::device_memory_resource* mr) const + { + auto const source_column_device_view = + column_device_view::create(source_lists_column_view.parent(), stream); + auto const target_column_device_view = + column_device_view::create(target_lists_column_view.parent(), stream); + auto const source_lists = cudf::detail::lists_column_device_view(*source_column_device_view); + auto const target_lists = cudf::detail::lists_column_device_view(*target_column_device_view); + + auto const source_structs = source_lists_column_view.child(); + auto const target_structs = target_lists_column_view.child(); + + auto const num_child_rows = get_num_child_rows(list_offsets, stream); + + auto const num_struct_members = + std::distance(source_structs.child_begin(), source_structs.child_end()); + std::vector> child_columns; + child_columns.reserve(num_struct_members); + + auto project_member_as_list = [stream, mr](column_view const& structs_member, + cudf::size_type const& structs_list_num_rows, + column_view const& structs_list_offsets, + rmm::device_buffer const& structs_list_nullmask, + cudf::size_type const& structs_list_null_count) { + return cudf::make_lists_column(structs_list_num_rows, + std::make_unique(structs_list_offsets, stream, mr), + std::make_unique(structs_member, stream, mr), + structs_list_null_count, + rmm::device_buffer(structs_list_nullmask), + stream, + mr); + }; + + auto const iter_source_member_as_list = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), [&](auto child_idx) { + return project_member_as_list( + source_structs.child(child_idx), + source_lists_column_view.size(), + source_lists_column_view.offsets(), + cudf::detail::copy_bitmask(source_lists_column_view.parent(), stream, mr), + source_lists_column_view.null_count()); + }); + + auto const iter_target_member_as_list = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), [&](auto child_idx) { + return project_member_as_list( + target_structs.child(child_idx), + target_lists_column_view.size(), + target_lists_column_view.offsets(), + cudf::detail::copy_bitmask(target_lists_column_view.parent(), stream, mr), + target_lists_column_view.null_count()); + }); + + std::transform( + iter_source_member_as_list, + iter_source_member_as_list + num_struct_members, + iter_target_member_as_list, + std::back_inserter(child_columns), + [&](auto source_struct_member_as_list, auto target_struct_member_as_list) { + return cudf::type_dispatcher( + source_struct_member_as_list->child(cudf::lists_column_view::child_column_index).type(), + list_child_constructor{}, + list_vector, + list_offsets, + cudf::lists_column_view(source_struct_member_as_list->view()), + cudf::lists_column_view(target_struct_member_as_list->view()), + stream, + mr); + }); + + auto child_null_mask = + source_lists_column_view.child().nullable() || target_lists_column_view.child().nullable() + ? construct_child_nullmask( + list_vector, list_offsets, source_lists, target_lists, num_child_rows, stream, mr) + : std::make_pair(rmm::device_buffer{}, 0); + + return cudf::make_structs_column(num_child_rows, + std::move(child_columns), + child_null_mask.second, + std::move(child_null_mask.first), + stream.value(), + mr); + } }; /** diff --git a/cpp/tests/copying/scatter_list_tests.cu b/cpp/tests/copying/scatter_list_tests.cu index 1d2691ed3a9..e8e11629628 100644 --- a/cpp/tests/copying/scatter_list_tests.cu +++ b/cpp/tests/copying/scatter_list_tests.cu @@ -466,3 +466,480 @@ TYPED_TEST(TypedScatterListsTest, NullListsOfLists) make_counting_transform_iterator(0, [](auto i) { return i != 4; })}, ret->get_column(0)); } + +TYPED_TEST(TypedScatterListsTest, ListsOfStructs) +{ + using namespace cudf::test; + using T = TypeParam; + using offsets_column = fixed_width_column_wrapper; + using numerics_column = fixed_width_column_wrapper; + + // clang-format off + auto source_numerics = numerics_column{ + 9, 9, 9, 9, + 8, 8, 8 + }; + + auto source_strings = strings_column_wrapper{ + "nine", "nine", "nine", "nine", + "eight", "eight", "eight" + }; + // clang-format on + + auto source_structs = structs_column_wrapper{{source_numerics, source_strings}}; + + auto source_lists = + cudf::make_lists_column(2, offsets_column{0, 4, 7}.release(), source_structs.release(), 0, {}); + + // clang-format off + auto target_ints = numerics_column{ + 0, 0, + 1, 1, + 2, 2, + 3, 3, + 4, 4, + 5, 5 + }; + + auto target_strings = strings_column_wrapper{ + "zero", "zero", + "one", "one", + "two", "two", + "three", "three", + "four", "four", + "five", "five" + }; + // clang-format on + + auto target_structs = structs_column_wrapper{{target_ints, target_strings}}; + + auto target_lists = cudf::make_lists_column( + 6, offsets_column{0, 2, 4, 6, 8, 10, 12}.release(), target_structs.release(), 0, {}); + + auto scatter_map = offsets_column{2, 0}; + + auto scatter_result = cudf::scatter(cudf::table_view({source_lists->view()}), + scatter_map, + cudf::table_view({target_lists->view()})); + + // clang-format off + auto expected_numerics = numerics_column{ + 8, 8, 8, + 1, 1, + 9, 9, 9, 9, + 3, 3, 4, 4, 5, 5 + }; + + auto expected_strings = strings_column_wrapper{ + "eight", "eight", "eight", + "one", "one", + "nine", "nine", "nine", "nine", + "three", "three", + "four", "four", + "five", "five" + }; + // clang-format on + + auto expected_structs = structs_column_wrapper{{expected_numerics, expected_strings}}; + + auto expected_lists = cudf::make_lists_column( + 6, offsets_column{0, 3, 5, 9, 11, 13, 15}.release(), expected_structs.release(), 0, {}); + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_lists->view(), scatter_result->get_column(0)); +} + +TYPED_TEST(TypedScatterListsTest, ListsOfStructsWithNullMembers) +{ + using namespace cudf::test; + using T = TypeParam; + using offsets_column = fixed_width_column_wrapper; + using numerics_column = fixed_width_column_wrapper; + + // clang-format off + auto source_numerics = numerics_column{ + { + 9, 9, 9, 9, + 8, 8, 8 + }, + make_counting_transform_iterator(0, [](auto i) { return i != 3; }) + }; + + auto source_strings = strings_column_wrapper{ + { + "nine", "nine", "nine", "nine", + "eight", "eight", "eight" + }, + make_counting_transform_iterator(0, [](auto i) { return i != 5; }) + }; + // clang-format on + + auto source_structs = structs_column_wrapper{{source_numerics, source_strings}}; + + auto source_lists = + cudf::make_lists_column(2, offsets_column{0, 4, 7}.release(), source_structs.release(), 0, {}); + + // clang-format off + auto target_ints = numerics_column{ + 0, 0, + 1, 1, + 2, 2, + 3, 3, + 4, 4, + 5, 5 + }; + + auto target_strings = strings_column_wrapper{ + "zero", "zero", + "one", "one", + "two", "two", + "three","three", + "four", "four", + "five", "five" + }; + // clang-format on + + auto target_structs = structs_column_wrapper{{target_ints, target_strings}}; + + auto target_lists = cudf::make_lists_column( + 6, offsets_column{0, 2, 4, 6, 8, 10, 12}.release(), target_structs.release(), 0, {}); + // clang-format on + + auto scatter_map = offsets_column{2, 0}; + + auto scatter_result = cudf::scatter(cudf::table_view({source_lists->view()}), + scatter_map, + cudf::table_view({target_lists->view()})); + + // clang-format off + auto expected_numerics = numerics_column{ + { + 8, 8, 8, + 1, 1, + 9, 9, 9, 9, + 3, 3, + 4, 4, + 5, 5 + }, + make_counting_transform_iterator(0, [](auto i) { return i != 8; }) + }; + + auto expected_strings = strings_column_wrapper{ + { + "eight", "eight", "eight", + "one", "one", + "nine", "nine", "nine", "nine", + "three", "three", + "four", "four", + "five", "five" + }, + make_counting_transform_iterator(0, [](auto i) { return i != 1; }) + }; + // clang-format on + + auto expected_structs = structs_column_wrapper{{expected_numerics, expected_strings}}; + + auto expected_lists = cudf::make_lists_column( + 6, offsets_column{0, 3, 5, 9, 11, 13, 15}.release(), expected_structs.release(), 0, {}); + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_lists->view(), scatter_result->get_column(0)); +} + +TYPED_TEST(TypedScatterListsTest, ListsOfNullStructs) +{ + using namespace cudf::test; + using T = TypeParam; + using offsets_column = fixed_width_column_wrapper; + using numerics_column = fixed_width_column_wrapper; + + // clang-format off + auto source_numerics = numerics_column{ + { + 9, 9, 9, 9, + 8, 8, 8 + }, + make_counting_transform_iterator(0, [](auto i) { return i != 3; }) + }; + + auto source_strings = strings_column_wrapper{ + { + "nine", "nine", "nine", "nine", + "eight", "eight", "eight" + }, + make_counting_transform_iterator(0, [](auto i) { return i != 5; }) + }; + // clang-format on + + auto source_structs = + structs_column_wrapper{{source_numerics, source_strings}, + make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; + + auto source_lists = + cudf::make_lists_column(2, offsets_column{0, 4, 7}.release(), source_structs.release(), 0, {}); + + // clang-format off + auto target_ints = numerics_column{ + 0, 0, + 1, 1, + 2, 2, + 3, 3, + 4, 4, + 5, 5 + }; + + auto target_strings = strings_column_wrapper{ + "zero", "zero", + "one", "one", + "two", "two", + "three", "three", + "four", "four", + "five", "five" + }; + // clang-format on + + auto target_structs = structs_column_wrapper{{target_ints, target_strings}}; + + auto target_lists = cudf::make_lists_column( + 6, offsets_column{0, 2, 4, 6, 8, 10, 12}.release(), target_structs.release(), 0, {}); + + auto scatter_map = offsets_column{2, 0}; + + auto scatter_result = cudf::scatter(cudf::table_view({source_lists->view()}), + scatter_map, + cudf::table_view({target_lists->view()})); + + // clang-format off + auto expected_numerics = numerics_column{ + { + 8, 8, 8, + 1, 1, + 9, 9, 9, 9, + 3, 3, + 4, 4, + 5, 5 + }, + make_counting_transform_iterator(0, [](auto i) { return (i != 6) && (i != 8); }) + }; + + auto expected_strings = strings_column_wrapper{ + { + "eight", "eight", "eight", + "one", "one", + "nine", "nine", "nine", "nine", + "three", "three", + "four", "four", + "five", "five" + }, + make_counting_transform_iterator(0, [](auto i) { return (i != 1) && (i != 6); }) + }; + // clang-format on + + auto expected_structs = structs_column_wrapper{{expected_numerics, expected_strings}}; + + auto expected_lists = cudf::make_lists_column( + 6, offsets_column{0, 3, 5, 9, 11, 13, 15}.release(), expected_structs.release(), 0, {}); + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_lists->view(), scatter_result->get_column(0)); +} + +TYPED_TEST(TypedScatterListsTest, EmptyListsOfStructs) +{ + using namespace cudf::test; + using T = TypeParam; + using offsets_column = fixed_width_column_wrapper; + using numerics_column = fixed_width_column_wrapper; + + // clang-format off + auto source_numerics = numerics_column{ + { + 9, 9, 9, 9, + 8, 8, 8 + }, + make_counting_transform_iterator(0, [](auto i) { return i != 3; }) + }; + + auto source_strings = strings_column_wrapper{ + { + "nine", "nine", "nine", "nine", + "eight", "eight", "eight" + }, + make_counting_transform_iterator(0, [](auto i) { return i != 5; }) + }; + // clang-format on + + auto source_structs = + structs_column_wrapper{{source_numerics, source_strings}, + make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; + + auto source_lists = cudf::make_lists_column( + 3, offsets_column{0, 4, 7, 7}.release(), source_structs.release(), 0, {}); + + // clang-format off + auto target_ints = numerics_column{ + 0, 0, + 1, 1, + 2, 2, + 3, 3, + 4, 4, + 5, 5 + }; + + auto target_strings = strings_column_wrapper{ + "zero", "zero", + "one", "one", + "two", "two", + "three", "three", + "four", "four", + "five", "five" + }; + // clang-format on + + auto target_structs = structs_column_wrapper{{target_ints, target_strings}}; + + auto target_lists = cudf::make_lists_column( + 6, offsets_column{0, 2, 4, 6, 8, 10, 12}.release(), target_structs.release(), 0, {}); + + auto scatter_map = offsets_column{2, 0, 4}; + + auto scatter_result = cudf::scatter(cudf::table_view({source_lists->view()}), + scatter_map, + cudf::table_view({target_lists->view()})); + + // clang-format off + auto expected_numerics = numerics_column{ + { + 8, 8, 8, + 1, 1, + 9, 9, 9, 9, + 3, 3, + 5, 5 + }, + make_counting_transform_iterator(0, [](auto i) { return (i != 6) && (i != 8); }) + }; + + auto expected_strings = strings_column_wrapper{ + { + "eight", "eight", "eight", + "one", "one", + "nine", "nine", "nine", "nine", + "three", "three", + "five", "five" + }, + make_counting_transform_iterator(0, [](auto i) { return (i != 1) && (i != 6); }) + }; + // clang-format on + + auto expected_structs = structs_column_wrapper{{expected_numerics, expected_strings}}; + + auto expected_lists = cudf::make_lists_column( + 6, offsets_column{0, 3, 5, 9, 11, 11, 13}.release(), expected_structs.release(), 0, {}); + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_lists->view(), scatter_result->get_column(0)); +} + +TYPED_TEST(TypedScatterListsTest, NullListsOfStructs) +{ + using namespace cudf::test; + using T = TypeParam; + using offsets_column = fixed_width_column_wrapper; + using numerics_column = fixed_width_column_wrapper; + + // clang-format off + auto source_numerics = numerics_column{ + { + 9, 9, 9, 9, + 8, 8, 8 + }, + make_counting_transform_iterator(0, [](auto i) { return i != 3; }) + }; + + auto source_strings = strings_column_wrapper{ + { + "nine", "nine", "nine", "nine", + "eight", "eight", "eight" + }, + make_counting_transform_iterator(0, [](auto i) { return i != 5; }) + }; + // clang-format on + + auto source_structs = + structs_column_wrapper{{source_numerics, source_strings}, + make_counting_transform_iterator(0, [](auto i) { return i != 1; })}; + + auto source_list_null_mask_begin = + make_counting_transform_iterator(0, [](auto i) { return i != 2; }); + + auto source_lists = cudf::make_lists_column( + 3, + offsets_column{0, 4, 7, 7}.release(), + source_structs.release(), + 1, + detail::make_null_mask(source_list_null_mask_begin, source_list_null_mask_begin + 3)); + + // clang-format off + auto target_ints = numerics_column{ + 0, 0, + 1, 1, + 2, 2, + 3, 3, + 4, 4, + 5, 5 + }; + auto target_strings = strings_column_wrapper{ + "zero", "zero", + "one", "one", + "two", "two", + "three", "three", + "four", "four", + "five", "five" + }; + // clang-format on + + auto target_structs = structs_column_wrapper{{target_ints, target_strings}}; + + auto target_lists = cudf::make_lists_column( + 6, offsets_column{0, 2, 4, 6, 8, 10, 12}.release(), target_structs.release(), 0, {}); + + auto scatter_map = offsets_column{2, 0, 4}; + + auto scatter_result = cudf::scatter(cudf::table_view({source_lists->view()}), + scatter_map, + cudf::table_view({target_lists->view()})); + + // clang-format off + auto expected_numerics = numerics_column{ + { + 8, 8, 8, + 1, 1, + 9, 9, 9, 9, + 3, 3, + 5, 5 + }, + make_counting_transform_iterator(0, [](auto i) { return (i != 6) && (i != 8); }) + }; + + auto expected_strings = strings_column_wrapper{ + { + "eight", "eight", "eight", + "one", "one", + "nine", "nine", "nine", "nine", + "three", "three", + "five", "five" + }, + make_counting_transform_iterator(0, [](auto i) { return i != 1 && i != 6; }) + }; + // clang-format on + + auto expected_structs = structs_column_wrapper{{expected_numerics, expected_strings}}; + + auto expected_lists_null_mask_begin = + make_counting_transform_iterator(0, [](auto i) { return i != 4; }); + + auto expected_lists = cudf::make_lists_column( + 6, + offsets_column{0, 3, 5, 9, 11, 11, 13}.release(), + expected_structs.release(), + 1, + detail::make_null_mask(expected_lists_null_mask_begin, expected_lists_null_mask_begin + 6)); + + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_lists->view(), scatter_result->get_column(0)); +}