diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils.hpp index 4039b6f5a772..33d9d6806a96 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils.hpp @@ -2330,6 +2330,217 @@ void validateCrsMatrix(int m, int n, const Rowmap &rowmapIn, } } +/** + * @brief Count the non-zeros of a sub-block in a CRS matrix and find the first + * and last column indices at each row of the sub-block. This is a host function + * used by the kk_extract_diagonal_blocks_crsmatrix_sequential() + */ +template +void kk_find_nnz_first_last_indices_subblock_crsmatrix_sequential( + const row_map_type &A_row_map, const entries_type &A_entries, + const ordinal_type &blk_row_start, const ordinal_type &blk_col_start, + const ordinal_type &blk_nrows, const ordinal_type &blk_ncols, + size_type &blk_nnz, offset_view1d_type &first_indices, + offset_view1d_type &last_indices) { + // Rowmap of i-th row-oriented sub-matrix + auto A_row_map_sub = Kokkos::subview( + A_row_map, + Kokkos::make_pair(blk_row_start, blk_row_start + blk_nrows + 1)); + + blk_nnz = 0; + + for (ordinal_type j = 0; j < blk_nrows; j++) { // loop through each row + size_type k1 = A_row_map_sub(j); + size_type k2 = A_row_map_sub(j + 1); + size_type k; + // Assume column indices are sorted in ascending order + // Find the position of the start column in the row + for (k = k1; k < k2; k++) { + ordinal_type col = A_entries(k); + if (col >= blk_col_start) { + break; + } + } + first_indices(j) = k; + // Find the position of the last column in the row + for (k = k2 - 1; k >= k1; k--) { + ordinal_type col = A_entries(k); + if (col < blk_col_start + blk_ncols) { + break; + } + } + last_indices(j) = k; + blk_nnz += (last_indices(j) - first_indices(j) + 1); + } +} + +/** + * @brief Extract a CRS sub-block from a CRS matrix + * This is a host function used by the + * kk_extract_diagonal_blocks_crsmatrix_sequential() + */ +template +void kk_extract_subblock_crsmatrix_sequential( + const entries_type &A_entries, const values_type &A_values, + const ordinal_type &blk_col_start, const ordinal_type &blk_nrows, + const size_type &blk_nnz, const offset_view1d_type &first_indices, + const offset_view1d_type &last_indices, out_row_map_type &blk_row_map, + out_entries_type &blk_entries, out_values_type &blk_values) { + // - create out_row_map + // - copy A_entries to out_entries and update out_entries with local column + // indices + // - copy A_values to out_values + size_type first_ = 0; + for (ordinal_type j = 0; j < blk_nrows; j++) { // loop through each row + size_type nnz = last_indices(j) - first_indices(j) + 1; + blk_row_map(j) = first_; + for (size_type k = 0; k < nnz; k++) { + blk_entries(first_ + k) = A_entries(first_indices(j) + k) - blk_col_start; + blk_values(first_ + k) = A_values(first_indices(j) + k); + } + first_ += nnz; + } + blk_row_map(blk_nrows) = blk_nnz; // last element +} + +/** + * @brief Extract the diagonal blocks out of a crs matrix. + * This is a blocking function that runs on the host. + * + * @tparam crsMat_t The type of the CRS matrix. + * @param A [in] The square CrsMatrix. It is expected that column indices are + * in ascending order + * @param DiagBlk_v [out] The vector of the extracted the CRS diagonal blocks + * (1 <= the number of diagonal blocks <= A_nrows) + * + * Usage Example: + * kk_extract_diagonal_blocks_crsmatrix_sequential(A_in, diagBlk_in_b); + */ +template +void kk_extract_diagonal_blocks_crsmatrix_sequential( + const crsMat_t &A, std::vector &DiagBlk_v) { + using row_map_type = typename crsMat_t::row_map_type; + using entries_type = typename crsMat_t::index_type; + using values_type = typename crsMat_t::values_type; + using graph_t = typename crsMat_t::StaticCrsGraphType; + using out_row_map_type = typename graph_t::row_map_type::non_const_type; + using out_entries_type = typename graph_t::entries_type::non_const_type; + using out_values_type = typename crsMat_t::values_type::non_const_type; + using out_row_map_hostmirror_type = typename out_row_map_type::HostMirror; + using out_entries_hostmirror_type = typename out_entries_type::HostMirror; + using out_values_hostmirror_type = typename out_values_type::HostMirror; + + using ordinal_type = typename crsMat_t::non_const_ordinal_type; + using size_type = typename crsMat_t::non_const_size_type; + using offset_view1d_type = + Kokkos::View; + + row_map_type A_row_map = A.graph.row_map; + entries_type A_entries = A.graph.entries; + values_type A_values = A.values; + + auto A_row_map_h = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A_row_map); + auto A_entries_h = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A_entries); + auto A_values_h = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A_values); + + ordinal_type A_nrows = static_cast(A.numRows()); + ordinal_type A_ncols = static_cast(A.numCols()); + ordinal_type n_blocks = static_cast(DiagBlk_v.size()); + + if (A_nrows != A_ncols) { + std::ostringstream os; + os << "The diagonal block extraction only works with square matrices -- " + "matrix A: " + << A_nrows << " x " << A_ncols; + throw std::runtime_error(os.str()); + } + + if (n_blocks == 1) { + // One block case: simply shallow copy A to DiagBlk_v[0] + DiagBlk_v[0] = crsMat_t(A); + } else { + // n_blocks > 1 + if (A_nrows == 0) { + // Degenerate case: A is an empty matrix + for (ordinal_type i = 0; i < n_blocks; i++) { + DiagBlk_v[i] = crsMat_t(); + } + } else { + // A_nrows >= 1 + if ((n_blocks < 1) || (A_nrows < n_blocks)) { + std::ostringstream os; + os << "The number of diagonal blocks (" << n_blocks + << ") should be >=1 and <= the number of rows of the matrix A (" + << A_nrows << ")"; + throw std::runtime_error(os.str()); + } + + ordinal_type rows_per_block = ((A_nrows % n_blocks) == 0) + ? (A_nrows / n_blocks) + : (A_nrows / n_blocks + 1); + + std::vector row_map_v(n_blocks); + std::vector entries_v(n_blocks); + std::vector values_v(n_blocks); + std::vector row_map_h_v(n_blocks); + std::vector entries_h_v(n_blocks); + std::vector values_h_v(n_blocks); + + ordinal_type blk_row_start = 0; // first row index of i-th diagonal block + ordinal_type blk_col_start = 0; // first col index of i-th diagonal block + ordinal_type blk_nrows, blk_ncols; // Nrows, Ncols of i-th diagonal block + + for (ordinal_type i = 0; i < n_blocks; i++) { + blk_nrows = rows_per_block; + if ((blk_row_start + rows_per_block) > A_nrows) { + blk_nrows = A_nrows - blk_row_start; + } + blk_col_start = blk_row_start; + blk_ncols = blk_nrows; + + // First round: count i-th non-zeros or size of entries_v[i] and find + // the first and last column indices at each row + size_type blk_nnz = 0; + offset_view1d_type first("first", blk_nrows); // first position per row + offset_view1d_type last("last", blk_nrows); // last position per row + + kk_find_nnz_first_last_indices_subblock_crsmatrix_sequential( + A_row_map_h, A_entries_h, blk_row_start, blk_col_start, blk_nrows, + blk_ncols, blk_nnz, first, last); + + // Second round: extract + row_map_v[i] = out_row_map_type("row_map_v", blk_nrows + 1); + entries_v[i] = out_entries_type("entries_v", blk_nnz); + values_v[i] = out_values_type("values_v", blk_nnz); + row_map_h_v[i] = + out_row_map_hostmirror_type("row_map_h_v", blk_nrows + 1); + entries_h_v[i] = out_entries_hostmirror_type("entries_h_v", blk_nnz); + values_h_v[i] = out_values_hostmirror_type("values_h_v", blk_nnz); + + kk_extract_subblock_crsmatrix_sequential( + A_entries_h, A_values_h, blk_col_start, blk_nrows, blk_nnz, first, + last, row_map_h_v[i], entries_h_v[i], values_h_v[i]); + + Kokkos::deep_copy(row_map_v[i], row_map_h_v[i]); + Kokkos::deep_copy(entries_v[i], entries_h_v[i]); + Kokkos::deep_copy(values_v[i], values_h_v[i]); + + DiagBlk_v[i] = crsMat_t("CrsMatrix", blk_nrows, blk_ncols, blk_nnz, + values_v[i], row_map_v[i], entries_v[i]); + + blk_row_start += blk_nrows; + } // for (ordinal_type i = 0; i < n_blocks; i++) + } // A_nrows >= 1 + } // n_blocks > 1 +} + } // namespace Impl using Impl::isCrsGraphSorted; diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse.hpp index e0d0085be145..ab17ca34788c 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse.hpp @@ -46,6 +46,7 @@ #include "Test_Sparse_ccs2crs.hpp" #include "Test_Sparse_crs2ccs.hpp" #include "Test_Sparse_removeCrsMatrixZeros.hpp" +#include "Test_Sparse_extractCrsDiagonalBlocks.hpp" // TPL specific tests, these require // particular pairs of backend and TPL diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_extractCrsDiagonalBlocks.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_extractCrsDiagonalBlocks.hpp new file mode 100644 index 000000000000..327780dec32d --- /dev/null +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_extractCrsDiagonalBlocks.hpp @@ -0,0 +1,154 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include "KokkosSparse_Utils.hpp" +#include "KokkosKernels_TestUtils.hpp" + +namespace Test { +template +void run_test_extract_diagonal_blocks(int nrows, int nblocks) { + using RowMapType = Kokkos::View; + using EntriesType = Kokkos::View; + using ValuesType = Kokkos::View; + using RowMapType_hm = typename RowMapType::HostMirror; + using EntriesType_hm = typename EntriesType::HostMirror; + using ValuesType_hm = typename ValuesType::HostMirror; + using crsMat_t = CrsMatrix; + + crsMat_t A; + std::vector DiagBlks(nblocks); + + if (nrows != 0) { + // Generate test matrix + const size_type nnz = 2 + (nrows - 2) * 3 + 2; + RowMapType_hm hrow_map("hrow_map", nrows + 1); + EntriesType_hm hentries("hentries", nnz); + ValuesType_hm hvalues("hvalues", nnz); + + // first row + hrow_map(0) = 0; + hentries(0) = 0; + hentries(1) = 1; + hvalues(0) = 0; + hvalues(1) = 1; + // rows in between + int cnt = 2; + for (int i = 1; i <= (nrows - 2); i++) { + hrow_map(i) = cnt; + hentries(cnt) = -1 + i; + hentries(cnt + 1) = 0 + i; + hentries(cnt + 2) = 1 + i; + hvalues(cnt) = -1 + i; + hvalues(cnt + 1) = 0 + i; + hvalues(cnt + 2) = 1 + i; + cnt += 3; + } + // last row + hrow_map(nrows - 1) = cnt; + hentries(nnz - 2) = nrows - 2; + hentries(nnz - 1) = nrows - 1; + hvalues(nnz - 2) = nrows - 2; + hvalues(nnz - 1) = nrows - 1; + // last element of row_map + hrow_map(nrows) = nnz; + + // Allocate A on device memory + RowMapType row_map("row_map", nrows + 1); + EntriesType entries("entries", nnz); + ValuesType values("values", nnz); + + // Copy from host to device + Kokkos::deep_copy(row_map, hrow_map); + Kokkos::deep_copy(entries, hentries); + Kokkos::deep_copy(values, hvalues); + + // Construct a CRS matrix + A = crsMat_t("CrsMatrix", nrows, nrows, nnz, values, row_map, entries); + } + + // Extract + KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(A, + DiagBlks); + + // Checking + lno_t numRows = 0; + lno_t numCols = 0; + for (int i = 0; i < nblocks; i++) { + numRows += DiagBlks[i].numRows(); + numCols += DiagBlks[i].numCols(); + } + + EXPECT_TRUE(numRows == static_cast(nrows)); + EXPECT_TRUE(numCols == static_cast(nrows)); + + if (nrows > 0) { + bool flag = true; + lno_t col_start = 0; + for (int i = 0; i < nblocks; i++) { + RowMapType_hm hrow_map_diagblk("hrow_map_diagblk", + DiagBlks[i].numRows() + 1); + EntriesType_hm hentries_diagblk("hentries_diagblk", DiagBlks[i].nnz()); + ValuesType_hm hvalues_diagblk("hvalues_diagblk", DiagBlks[i].nnz()); + + Kokkos::deep_copy(hrow_map_diagblk, DiagBlks[i].graph.row_map); + Kokkos::deep_copy(hentries_diagblk, DiagBlks[i].graph.entries); + Kokkos::deep_copy(hvalues_diagblk, DiagBlks[i].values); + + for (int j = 0; j < static_cast(DiagBlks[i].numRows()); j++) { + size_type k1 = hrow_map_diagblk(j); + size_type k2 = hrow_map_diagblk(j + 1); + for (size_type k = k1; k < k2; k++) { + scalar_t col = static_cast(hentries_diagblk(k) + col_start); + scalar_t val = hvalues_diagblk(k); + if (Kokkos::abs(col - val) != 0) { + flag = false; + break; + } + } + if (flag == false) break; + } + if (flag == false) break; + col_start += DiagBlks[i].numCols(); + } + EXPECT_TRUE(flag); + } +} +} // namespace Test + +template +void test_extract_diagonal_blocks() { + for (int s = 1; s <= 8; s++) { + Test::run_test_extract_diagonal_blocks( + 0, s); + Test::run_test_extract_diagonal_blocks( + 12, s); + Test::run_test_extract_diagonal_blocks( + 123, s); + } +} + +#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F( \ + TestCategory, \ + sparse##_##extract_diagonal_blocks##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_extract_diagonal_blocks(); \ + } + +#include + +#undef KOKKOSKERNELS_EXECUTE_TEST