Skip to content

Commit

Permalink
Merge Pull Request #12212 from vqd8a/Trilinos/kk-get-diagonal-blocks-…
Browse files Browse the repository at this point in the history
…crsmatrix

Automatically Merged using Trilinos Pull Request AutoTester
PR Title: b'KokkosKernels: extract diagonal blocks from a CRS matrix into separate CRS matrices'
PR Author: vqd8a
  • Loading branch information
trilinos-autotester committed Sep 6, 2023
2 parents cb96935 + b7feff3 commit 4b9ad2b
Show file tree
Hide file tree
Showing 3 changed files with 366 additions and 0 deletions.
211 changes: 211 additions & 0 deletions packages/kokkos-kernels/sparse/src/KokkosSparse_Utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2330,6 +2330,217 @@ void validateCrsMatrix(int m, int n, const Rowmap &rowmapIn,
}
}

/**
* @brief Count the non-zeros of a sub-block in a CRS matrix and find the first
* and last column indices at each row of the sub-block. This is a host function
* used by the kk_extract_diagonal_blocks_crsmatrix_sequential()
*/
template <typename row_map_type, typename entries_type, typename ordinal_type,
typename size_type, typename offset_view1d_type>
void kk_find_nnz_first_last_indices_subblock_crsmatrix_sequential(
const row_map_type &A_row_map, const entries_type &A_entries,
const ordinal_type &blk_row_start, const ordinal_type &blk_col_start,
const ordinal_type &blk_nrows, const ordinal_type &blk_ncols,
size_type &blk_nnz, offset_view1d_type &first_indices,
offset_view1d_type &last_indices) {
// Rowmap of i-th row-oriented sub-matrix
auto A_row_map_sub = Kokkos::subview(
A_row_map,
Kokkos::make_pair(blk_row_start, blk_row_start + blk_nrows + 1));

blk_nnz = 0;

for (ordinal_type j = 0; j < blk_nrows; j++) { // loop through each row
size_type k1 = A_row_map_sub(j);
size_type k2 = A_row_map_sub(j + 1);
size_type k;
// Assume column indices are sorted in ascending order
// Find the position of the start column in the row
for (k = k1; k < k2; k++) {
ordinal_type col = A_entries(k);
if (col >= blk_col_start) {
break;
}
}
first_indices(j) = k;
// Find the position of the last column in the row
for (k = k2 - 1; k >= k1; k--) {
ordinal_type col = A_entries(k);
if (col < blk_col_start + blk_ncols) {
break;
}
}
last_indices(j) = k;
blk_nnz += (last_indices(j) - first_indices(j) + 1);
}
}

/**
* @brief Extract a CRS sub-block from a CRS matrix
* This is a host function used by the
* kk_extract_diagonal_blocks_crsmatrix_sequential()
*/
template <typename entries_type, typename values_type, typename ordinal_type,
typename size_type, typename offset_view1d_type,
typename out_row_map_type, typename out_entries_type,
typename out_values_type>
void kk_extract_subblock_crsmatrix_sequential(
const entries_type &A_entries, const values_type &A_values,
const ordinal_type &blk_col_start, const ordinal_type &blk_nrows,
const size_type &blk_nnz, const offset_view1d_type &first_indices,
const offset_view1d_type &last_indices, out_row_map_type &blk_row_map,
out_entries_type &blk_entries, out_values_type &blk_values) {
// - create out_row_map
// - copy A_entries to out_entries and update out_entries with local column
// indices
// - copy A_values to out_values
size_type first_ = 0;
for (ordinal_type j = 0; j < blk_nrows; j++) { // loop through each row
size_type nnz = last_indices(j) - first_indices(j) + 1;
blk_row_map(j) = first_;
for (size_type k = 0; k < nnz; k++) {
blk_entries(first_ + k) = A_entries(first_indices(j) + k) - blk_col_start;
blk_values(first_ + k) = A_values(first_indices(j) + k);
}
first_ += nnz;
}
blk_row_map(blk_nrows) = blk_nnz; // last element
}

/**
* @brief Extract the diagonal blocks out of a crs matrix.
* This is a blocking function that runs on the host.
*
* @tparam crsMat_t The type of the CRS matrix.
* @param A [in] The square CrsMatrix. It is expected that column indices are
* in ascending order
* @param DiagBlk_v [out] The vector of the extracted the CRS diagonal blocks
* (1 <= the number of diagonal blocks <= A_nrows)
*
* Usage Example:
* kk_extract_diagonal_blocks_crsmatrix_sequential(A_in, diagBlk_in_b);
*/
template <typename crsMat_t>
void kk_extract_diagonal_blocks_crsmatrix_sequential(
const crsMat_t &A, std::vector<crsMat_t> &DiagBlk_v) {
using row_map_type = typename crsMat_t::row_map_type;
using entries_type = typename crsMat_t::index_type;
using values_type = typename crsMat_t::values_type;
using graph_t = typename crsMat_t::StaticCrsGraphType;
using out_row_map_type = typename graph_t::row_map_type::non_const_type;
using out_entries_type = typename graph_t::entries_type::non_const_type;
using out_values_type = typename crsMat_t::values_type::non_const_type;
using out_row_map_hostmirror_type = typename out_row_map_type::HostMirror;
using out_entries_hostmirror_type = typename out_entries_type::HostMirror;
using out_values_hostmirror_type = typename out_values_type::HostMirror;

using ordinal_type = typename crsMat_t::non_const_ordinal_type;
using size_type = typename crsMat_t::non_const_size_type;
using offset_view1d_type =
Kokkos::View<size_type *, Kokkos::LayoutLeft, Kokkos::HostSpace>;

row_map_type A_row_map = A.graph.row_map;
entries_type A_entries = A.graph.entries;
values_type A_values = A.values;

auto A_row_map_h =
Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A_row_map);
auto A_entries_h =
Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A_entries);
auto A_values_h =
Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A_values);

ordinal_type A_nrows = static_cast<ordinal_type>(A.numRows());
ordinal_type A_ncols = static_cast<ordinal_type>(A.numCols());
ordinal_type n_blocks = static_cast<ordinal_type>(DiagBlk_v.size());

if (A_nrows != A_ncols) {
std::ostringstream os;
os << "The diagonal block extraction only works with square matrices -- "
"matrix A: "
<< A_nrows << " x " << A_ncols;
throw std::runtime_error(os.str());
}

if (n_blocks == 1) {
// One block case: simply shallow copy A to DiagBlk_v[0]
DiagBlk_v[0] = crsMat_t(A);
} else {
// n_blocks > 1
if (A_nrows == 0) {
// Degenerate case: A is an empty matrix
for (ordinal_type i = 0; i < n_blocks; i++) {
DiagBlk_v[i] = crsMat_t();
}
} else {
// A_nrows >= 1
if ((n_blocks < 1) || (A_nrows < n_blocks)) {
std::ostringstream os;
os << "The number of diagonal blocks (" << n_blocks
<< ") should be >=1 and <= the number of rows of the matrix A ("
<< A_nrows << ")";
throw std::runtime_error(os.str());
}

ordinal_type rows_per_block = ((A_nrows % n_blocks) == 0)
? (A_nrows / n_blocks)
: (A_nrows / n_blocks + 1);

std::vector<out_row_map_type> row_map_v(n_blocks);
std::vector<out_entries_type> entries_v(n_blocks);
std::vector<out_values_type> values_v(n_blocks);
std::vector<out_row_map_hostmirror_type> row_map_h_v(n_blocks);
std::vector<out_entries_hostmirror_type> entries_h_v(n_blocks);
std::vector<out_values_hostmirror_type> values_h_v(n_blocks);

ordinal_type blk_row_start = 0; // first row index of i-th diagonal block
ordinal_type blk_col_start = 0; // first col index of i-th diagonal block
ordinal_type blk_nrows, blk_ncols; // Nrows, Ncols of i-th diagonal block

for (ordinal_type i = 0; i < n_blocks; i++) {
blk_nrows = rows_per_block;
if ((blk_row_start + rows_per_block) > A_nrows) {
blk_nrows = A_nrows - blk_row_start;
}
blk_col_start = blk_row_start;
blk_ncols = blk_nrows;

// First round: count i-th non-zeros or size of entries_v[i] and find
// the first and last column indices at each row
size_type blk_nnz = 0;
offset_view1d_type first("first", blk_nrows); // first position per row
offset_view1d_type last("last", blk_nrows); // last position per row

kk_find_nnz_first_last_indices_subblock_crsmatrix_sequential(
A_row_map_h, A_entries_h, blk_row_start, blk_col_start, blk_nrows,
blk_ncols, blk_nnz, first, last);

// Second round: extract
row_map_v[i] = out_row_map_type("row_map_v", blk_nrows + 1);
entries_v[i] = out_entries_type("entries_v", blk_nnz);
values_v[i] = out_values_type("values_v", blk_nnz);
row_map_h_v[i] =
out_row_map_hostmirror_type("row_map_h_v", blk_nrows + 1);
entries_h_v[i] = out_entries_hostmirror_type("entries_h_v", blk_nnz);
values_h_v[i] = out_values_hostmirror_type("values_h_v", blk_nnz);

kk_extract_subblock_crsmatrix_sequential(
A_entries_h, A_values_h, blk_col_start, blk_nrows, blk_nnz, first,
last, row_map_h_v[i], entries_h_v[i], values_h_v[i]);

Kokkos::deep_copy(row_map_v[i], row_map_h_v[i]);
Kokkos::deep_copy(entries_v[i], entries_h_v[i]);
Kokkos::deep_copy(values_v[i], values_h_v[i]);

DiagBlk_v[i] = crsMat_t("CrsMatrix", blk_nrows, blk_ncols, blk_nnz,
values_v[i], row_map_v[i], entries_v[i]);

blk_row_start += blk_nrows;
} // for (ordinal_type i = 0; i < n_blocks; i++)
} // A_nrows >= 1
} // n_blocks > 1
}

} // namespace Impl

using Impl::isCrsGraphSorted;
Expand Down
1 change: 1 addition & 0 deletions packages/kokkos-kernels/sparse/unit_test/Test_Sparse.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#include "Test_Sparse_ccs2crs.hpp"
#include "Test_Sparse_crs2ccs.hpp"
#include "Test_Sparse_removeCrsMatrixZeros.hpp"
#include "Test_Sparse_extractCrsDiagonalBlocks.hpp"

// TPL specific tests, these require
// particular pairs of backend and TPL
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER

#include "KokkosSparse_Utils.hpp"
#include "KokkosKernels_TestUtils.hpp"

namespace Test {
template <typename scalar_t, typename lno_t, typename size_type,
typename device>
void run_test_extract_diagonal_blocks(int nrows, int nblocks) {
using RowMapType = Kokkos::View<size_type *, device>;
using EntriesType = Kokkos::View<lno_t *, device>;
using ValuesType = Kokkos::View<scalar_t *, device>;
using RowMapType_hm = typename RowMapType::HostMirror;
using EntriesType_hm = typename EntriesType::HostMirror;
using ValuesType_hm = typename ValuesType::HostMirror;
using crsMat_t = CrsMatrix<scalar_t, lno_t, device, void, size_type>;

crsMat_t A;
std::vector<crsMat_t> DiagBlks(nblocks);

if (nrows != 0) {
// Generate test matrix
const size_type nnz = 2 + (nrows - 2) * 3 + 2;
RowMapType_hm hrow_map("hrow_map", nrows + 1);
EntriesType_hm hentries("hentries", nnz);
ValuesType_hm hvalues("hvalues", nnz);

// first row
hrow_map(0) = 0;
hentries(0) = 0;
hentries(1) = 1;
hvalues(0) = 0;
hvalues(1) = 1;
// rows in between
int cnt = 2;
for (int i = 1; i <= (nrows - 2); i++) {
hrow_map(i) = cnt;
hentries(cnt) = -1 + i;
hentries(cnt + 1) = 0 + i;
hentries(cnt + 2) = 1 + i;
hvalues(cnt) = -1 + i;
hvalues(cnt + 1) = 0 + i;
hvalues(cnt + 2) = 1 + i;
cnt += 3;
}
// last row
hrow_map(nrows - 1) = cnt;
hentries(nnz - 2) = nrows - 2;
hentries(nnz - 1) = nrows - 1;
hvalues(nnz - 2) = nrows - 2;
hvalues(nnz - 1) = nrows - 1;
// last element of row_map
hrow_map(nrows) = nnz;

// Allocate A on device memory
RowMapType row_map("row_map", nrows + 1);
EntriesType entries("entries", nnz);
ValuesType values("values", nnz);

// Copy from host to device
Kokkos::deep_copy(row_map, hrow_map);
Kokkos::deep_copy(entries, hentries);
Kokkos::deep_copy(values, hvalues);

// Construct a CRS matrix
A = crsMat_t("CrsMatrix", nrows, nrows, nnz, values, row_map, entries);
}

// Extract
KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(A,
DiagBlks);

// Checking
lno_t numRows = 0;
lno_t numCols = 0;
for (int i = 0; i < nblocks; i++) {
numRows += DiagBlks[i].numRows();
numCols += DiagBlks[i].numCols();
}

EXPECT_TRUE(numRows == static_cast<lno_t>(nrows));
EXPECT_TRUE(numCols == static_cast<lno_t>(nrows));

if (nrows > 0) {
bool flag = true;
lno_t col_start = 0;
for (int i = 0; i < nblocks; i++) {
RowMapType_hm hrow_map_diagblk("hrow_map_diagblk",
DiagBlks[i].numRows() + 1);
EntriesType_hm hentries_diagblk("hentries_diagblk", DiagBlks[i].nnz());
ValuesType_hm hvalues_diagblk("hvalues_diagblk", DiagBlks[i].nnz());

Kokkos::deep_copy(hrow_map_diagblk, DiagBlks[i].graph.row_map);
Kokkos::deep_copy(hentries_diagblk, DiagBlks[i].graph.entries);
Kokkos::deep_copy(hvalues_diagblk, DiagBlks[i].values);

for (int j = 0; j < static_cast<int>(DiagBlks[i].numRows()); j++) {
size_type k1 = hrow_map_diagblk(j);
size_type k2 = hrow_map_diagblk(j + 1);
for (size_type k = k1; k < k2; k++) {
scalar_t col = static_cast<scalar_t>(hentries_diagblk(k) + col_start);
scalar_t val = hvalues_diagblk(k);
if (Kokkos::abs(col - val) != 0) {
flag = false;
break;
}
}
if (flag == false) break;
}
if (flag == false) break;
col_start += DiagBlks[i].numCols();
}
EXPECT_TRUE(flag);
}
}
} // namespace Test

template <typename scalar_t, typename lno_t, typename size_type,
typename device>
void test_extract_diagonal_blocks() {
for (int s = 1; s <= 8; s++) {
Test::run_test_extract_diagonal_blocks<scalar_t, lno_t, size_type, device>(
0, s);
Test::run_test_extract_diagonal_blocks<scalar_t, lno_t, size_type, device>(
12, s);
Test::run_test_extract_diagonal_blocks<scalar_t, lno_t, size_type, device>(
123, s);
}
}

#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \
TEST_F( \
TestCategory, \
sparse##_##extract_diagonal_blocks##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \
test_extract_diagonal_blocks<SCALAR, ORDINAL, OFFSET, DEVICE>(); \
}

#include <Test_Common_Test_All_Type_Combos.hpp>

#undef KOKKOSKERNELS_EXECUTE_TEST

0 comments on commit 4b9ad2b

Please sign in to comment.