From 5ef2d182dae0caa0a5ee0196507b4492a956bbc6 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Mon, 9 May 2022 14:06:52 +0200 Subject: [PATCH] extract commonly used GMRES kernels --- common/CMakeLists.txt | 1 + .../unified/solver/common_gmres_kernels.cpp | 193 +++++++++++++++ common/unified/solver/gmres_kernels.cpp | 178 +++----------- core/device_hooks/common_kernels.inc.cpp | 16 +- core/solver/cb_gmres.cpp | 16 +- core/solver/common_gmres_kernels.hpp | 101 ++++++++ core/solver/gmres.cpp | 45 ++-- core/solver/gmres_kernels.hpp | 59 ++--- cuda/base/kernel_launch.cuh | 22 ++ hip/base/kernel_launch.hip.hpp | 22 ++ reference/CMakeLists.txt | 1 + reference/solver/common_gmres_kernels.cpp | 228 ++++++++++++++++++ reference/solver/gmres_kernels.cpp | 227 ++--------------- test/solver/gmres_kernels.cpp | 75 +++--- 14 files changed, 730 insertions(+), 454 deletions(-) create mode 100644 common/unified/solver/common_gmres_kernels.cpp create mode 100644 core/solver/common_gmres_kernels.hpp create mode 100644 reference/solver/common_gmres_kernels.cpp diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 0e76757ec94..6ba28e20b8e 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -20,6 +20,7 @@ set(UNIFIED_SOURCES solver/bicgstab_kernels.cpp solver/cg_kernels.cpp solver/cgs_kernels.cpp + solver/common_gmres_kernels.cpp solver/fcg_kernels.cpp solver/gmres_kernels.cpp solver/ir_kernels.cpp diff --git a/common/unified/solver/common_gmres_kernels.cpp b/common/unified/solver/common_gmres_kernels.cpp new file mode 100644 index 00000000000..aefcb2fa279 --- /dev/null +++ b/common/unified/solver/common_gmres_kernels.cpp @@ -0,0 +1,193 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/solver/common_gmres_kernels.hpp" + + +#include + + +#include "common/unified/base/kernel_launch.hpp" +#include "core/solver/cb_gmres_kernels.hpp" + + +namespace gko { +namespace kernels { +namespace GKO_DEVICE_NAMESPACE { +/** + * @brief The common GMRES solver namespace. + * + * @ingroup gmres + */ +namespace common_gmres { + + +template +void initialize(std::shared_ptr exec, + const matrix::Dense* b, + matrix::Dense* residual, + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + stopping_status* stop_status) +{ + const auto krylov_dim = givens_sin->get_size()[0]; + run_kernel( + exec, + [] GKO_KERNEL(auto i, auto j, auto b, auto residual, auto givens_sin, + auto givens_cos, auto stop_status, auto krylov_dim, + auto num_rows) { + using value_type = std::decay_t; + if (i == 0) { + stop_status[j].reset(); + } + if (i < num_rows) { + residual(i, j) = b(i, j); + } + if (i < krylov_dim) { + givens_sin(i, j) = zero(); + givens_cos(i, j) = zero(); + } + }, + dim<2>{std::max(b->get_size()[0], krylov_dim), b->get_size()[1]}, b, + residual, givens_sin, givens_cos, stop_status, krylov_dim, + b->get_size()[0]); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_COMMON_GMRES_INITIALIZE_KERNEL); + + +template +void hessenberg_qr(std::shared_ptr exec, + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, + matrix::Dense* hessenberg_iter, size_type iter, + size_type* final_iter_nums, + const stopping_status* stop_status) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto rhs, auto givens_sin, auto givens_cos, + auto residual_norm, auto residual_norm_collection, + auto hessenberg_iter, auto iter, auto final_iter_nums, + auto stop_status) { + using value_type = std::decay_t; + if (stop_status[rhs].has_stopped()) { + return; + } + // increment iteration count + final_iter_nums[rhs]++; + // apply previous Givens rotations to column + for (int64 j = 0; j < iter; ++j) { + auto out1 = givens_cos(j, rhs) * hessenberg_iter(j, rhs) + + givens_sin(j, rhs) * hessenberg_iter(j + 1, rhs); + auto out2 = + -conj(givens_sin(j, rhs)) * hessenberg_iter(j, rhs) + + conj(givens_cos(j, rhs)) * hessenberg_iter(j + 1, rhs); + hessenberg_iter(j, rhs) = out1; + hessenberg_iter(j + 1, rhs) = out2; + } + // compute new Givens rotation + if (hessenberg_iter(iter, rhs) == zero()) { + givens_cos(iter, rhs) = zero(); + givens_sin(iter, rhs) = one(); + } else { + const auto this_hess = hessenberg_iter(iter, rhs); + const auto next_hess = hessenberg_iter(iter + 1, rhs); + const auto scale = abs(this_hess) + abs(next_hess); + const auto hypotenuse = + scale * + sqrt(abs(this_hess / scale) * abs(this_hess / scale) + + abs(next_hess / scale) * abs(next_hess / scale)); + givens_cos(iter, rhs) = conj(this_hess) / hypotenuse; + givens_sin(iter, rhs) = conj(next_hess) / hypotenuse; + } + // apply new Givens rotation to column + hessenberg_iter(iter, rhs) = + givens_cos(iter, rhs) * hessenberg_iter(iter, rhs) + + givens_sin(iter, rhs) * hessenberg_iter(iter + 1, rhs); + hessenberg_iter(iter + 1, rhs) = zero(); + // apply new Givens rotation to RHS of least-squares problem + residual_norm_collection(iter + 1, rhs) = + -conj(givens_sin(iter, rhs)) * + residual_norm_collection(iter, rhs); + residual_norm_collection(iter, rhs) = + givens_cos(iter, rhs) * residual_norm_collection(iter, rhs); + residual_norm(0, rhs) = + abs(residual_norm_collection(iter + 1, rhs)); + }, + hessenberg_iter->get_size()[1], givens_sin, givens_cos, residual_norm, + residual_norm_collection, hessenberg_iter, iter, final_iter_nums, + stop_status); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_COMMON_GMRES_HESSENBERG_QR_KERNEL); + + +template +void solve_krylov(std::shared_ptr exec, + const matrix::Dense* residual_norm_collection, + const matrix::Dense* hessenberg, + matrix::Dense* y, const size_type* final_iter_nums, + const stopping_status* stop_status) +{ + run_kernel( + exec, + [] GKO_KERNEL(auto col, auto rhs, auto mtx, auto y, auto sizes, + auto stop, auto num_cols) { + if (stop[col].is_finalized()) { + return; + } + for (int i = sizes[col] - 1; i >= 0; i--) { + auto value = rhs(i, col); + for (int j = i + 1; j < sizes[col]; j++) { + value -= mtx(i, j * num_cols + col) * y(j, col); + } + // y(i) = (rhs(i) - U(i,i+1:) * y(i+1:)) / U(i, i) + y(i, col) = value / mtx(i, i * num_cols + col); + } + }, + residual_norm_collection->get_size()[1], residual_norm_collection, + hessenberg, y, final_iter_nums, stop_status, + residual_norm_collection->get_size()[1]); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_COMMON_GMRES_SOLVE_KRYLOV_KERNEL); + + +} // namespace common_gmres +} // namespace GKO_DEVICE_NAMESPACE +} // namespace kernels +} // namespace gko diff --git a/common/unified/solver/gmres_kernels.cpp b/common/unified/solver/gmres_kernels.cpp index 4e7e8a91722..dcf62457011 100644 --- a/common/unified/solver/gmres_kernels.cpp +++ b/common/unified/solver/gmres_kernels.cpp @@ -1,5 +1,5 @@ /************************************************************* -Copyright (c) 2017-2021, the Ginkgo authors +Copyright (c) 2017-2022, the Ginkgo authors All rights reserved. Redistribution and use in source and binary forms, with or without @@ -51,164 +51,50 @@ namespace GKO_DEVICE_NAMESPACE { namespace gmres { -template -void initialize(std::shared_ptr exec, - const matrix::Dense* b, - matrix::Dense* residual, - matrix::Dense* givens_sin, - matrix::Dense* givens_cos, - Array& stop_status) -{ - const auto krylov_dim = givens_sin->get_size()[0]; - run_kernel( - exec, - [] GKO_KERNEL(auto i, auto j, auto b, auto residual, auto givens_sin, - auto givens_cos, auto stop_status, auto krylov_dim, - auto num_rows) { - using value_type = std::decay_t; - if (i == 0) { - stop_status[j].reset(); - } - if (i < num_rows) { - residual(i, j) = b(i, j); - } - if (i < krylov_dim) { - givens_sin(i, j) = zero(); - givens_cos(i, j) = zero(); - } - }, - dim<2>{std::max(b->get_size()[0], krylov_dim), b->get_size()[1]}, b, - residual, givens_sin, givens_cos, stop_status, krylov_dim, - b->get_size()[0]); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_INITIALIZE_KERNEL); - - template void restart(std::shared_ptr exec, const matrix::Dense* residual, matrix::Dense>* residual_norm, matrix::Dense* residual_norm_collection, - matrix::Dense* krylov_bases, - Array& final_iter_nums) + matrix::Dense* krylov_bases, size_type* final_iter_nums) { - run_kernel( - exec, - [] GKO_KERNEL(auto i, auto j, auto residual, auto residual_norm, - auto residual_norm_collection, auto krylov_bases, - auto final_iter_nums) { - if (i == 0) { + if (residual->get_size()[0] == 0) { + run_kernel( + exec, + [] GKO_KERNEL(auto j, auto residual_norm, + auto residual_norm_collection, auto final_iter_nums) { residual_norm_collection(0, j) = residual_norm(0, j); final_iter_nums[j] = 0; - } - krylov_bases(i, j) = residual(i, j) / residual_norm(0, j); - }, - residual->get_size(), residual, residual_norm, residual_norm_collection, - krylov_bases, final_iter_nums); + }, + residual->get_size()[1], residual_norm, residual_norm_collection, + final_iter_nums); + } else { + run_kernel( + exec, + [] GKO_KERNEL(auto i, auto j, auto residual, auto residual_norm, + auto residual_norm_collection, auto krylov_bases, + auto final_iter_nums) { + if (i == 0) { + residual_norm_collection(0, j) = residual_norm(0, j); + final_iter_nums[j] = 0; + } + krylov_bases(i, j) = residual(i, j) / residual_norm(0, j); + }, + residual->get_size(), residual, residual_norm, + residual_norm_collection, krylov_bases, final_iter_nums); + } } GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_RESTART_KERNEL); template -void hessenberg_qr(std::shared_ptr exec, - matrix::Dense* givens_sin, - matrix::Dense* givens_cos, - matrix::Dense>* residual_norm, - matrix::Dense* residual_norm_collection, - matrix::Dense* hessenberg_iter, size_type iter, - Array& final_iter_nums, - const Array& stop_status) -{ - run_kernel( - exec, - [] GKO_KERNEL(auto rhs, auto givens_sin, auto givens_cos, - auto residual_norm, auto residual_norm_collection, - auto hessenberg_iter, auto iter, auto final_iter_nums, - auto stop_status) { - using value_type = std::decay_t; - if (stop_status[rhs].has_stopped()) { - return; - } - // increment iteration count - final_iter_nums[rhs]++; - // apply previous Givens rotations to column - for (int64 j = 0; j < iter; ++j) { - auto out1 = givens_cos(j, rhs) * hessenberg_iter(j, rhs) + - givens_sin(j, rhs) * hessenberg_iter(j + 1, rhs); - auto out2 = - -conj(givens_sin(j, rhs)) * hessenberg_iter(j, rhs) + - conj(givens_cos(j, rhs)) * hessenberg_iter(j + 1, rhs); - hessenberg_iter(j, rhs) = out1; - hessenberg_iter(j + 1, rhs) = out2; - } - // compute new Givens rotation - if (hessenberg_iter(iter, rhs) == zero()) { - givens_cos(iter, rhs) = zero(); - givens_sin(iter, rhs) = one(); - } else { - const auto this_hess = hessenberg_iter(iter, rhs); - const auto next_hess = hessenberg_iter(iter + 1, rhs); - const auto scale = abs(this_hess) + abs(next_hess); - const auto hypotenuse = - scale * - sqrt(abs(this_hess / scale) * abs(this_hess / scale) + - abs(next_hess / scale) * abs(next_hess / scale)); - givens_cos(iter, rhs) = conj(this_hess) / hypotenuse; - givens_sin(iter, rhs) = conj(next_hess) / hypotenuse; - } - // apply new Givens rotation to column - hessenberg_iter(iter, rhs) = - givens_cos(iter, rhs) * hessenberg_iter(iter, rhs) + - givens_sin(iter, rhs) * hessenberg_iter(iter + 1, rhs); - hessenberg_iter(iter + 1, rhs) = zero(); - // apply new Givens rotation to RHS of least-squares problem - residual_norm_collection(iter + 1, rhs) = - -conj(givens_sin(iter, rhs)) * - residual_norm_collection(iter, rhs); - residual_norm_collection(iter, rhs) = - givens_cos(iter, rhs) * residual_norm_collection(iter, rhs); - residual_norm(0, rhs) = - abs(residual_norm_collection(iter + 1, rhs)); - }, - hessenberg_iter->get_size()[1], givens_sin, givens_cos, residual_norm, - residual_norm_collection, hessenberg_iter, iter, final_iter_nums, - stop_status); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_HESSENBERG_QR_KERNEL); - - -template -void solve_krylov(std::shared_ptr exec, - const matrix::Dense* residual_norm_collection, - const matrix::Dense* krylov_bases, - const matrix::Dense* hessenberg, - matrix::Dense* y, - matrix::Dense* before_preconditioner, - const Array& final_iter_nums, - Array& stop_status) +void multi_axpy(std::shared_ptr exec, + const matrix::Dense* krylov_bases, + const matrix::Dense* y, + matrix::Dense* before_preconditioner, + const size_type* final_iter_nums, stopping_status* stop_status) { - run_kernel( - exec, - [] GKO_KERNEL(auto col, auto rhs, auto mtx, auto y, auto sizes, - auto stop, auto num_cols) { - if (stop[col].is_finalized()) { - return; - } - for (int i = sizes[col] - 1; i >= 0; i--) { - auto value = rhs(i, col); - for (int j = i + 1; j < sizes[col]; j++) { - value -= mtx(i, j * num_cols + col) * y(j, col); - } - // y(i) = (rhs(i) - U(i,i+1:) * y(i+1:)) / U(i, i) - y(i, col) = value / mtx(i, i * num_cols + col); - } - }, - residual_norm_collection->get_size()[1], residual_norm_collection, - hessenberg, y, final_iter_nums, stop_status, - residual_norm_collection->get_size()[1]); run_kernel( exec, [] GKO_KERNEL(auto row, auto col, auto bases, auto y, auto out, @@ -232,10 +118,10 @@ void solve_krylov(std::shared_ptr exec, stop[col].finalize(); } }, - stop_status.get_num_elems(), stop_status); + before_preconditioner->get_size()[1], stop_status); } -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_SOLVE_KRYLOV_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_MULTI_AXPY_KERNEL); } // namespace gmres diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index ec0a5f421cf..f4c7c2491e9 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -71,6 +71,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/solver/cb_gmres_kernels.hpp" #include "core/solver/cg_kernels.hpp" #include "core/solver/cgs_kernels.hpp" +#include "core/solver/common_gmres_kernels.hpp" #include "core/solver/fcg_kernels.hpp" #include "core/solver/gmres_kernels.hpp" #include "core/solver/idr_kernels.hpp" @@ -407,13 +408,22 @@ GKO_STUB_VALUE_TYPE(GKO_DECLARE_CGS_STEP_3_KERNEL); } // namespace cgs +namespace common_gmres { + + +GKO_STUB_VALUE_TYPE(GKO_DECLARE_COMMON_GMRES_INITIALIZE_KERNEL); +GKO_STUB_VALUE_TYPE(GKO_DECLARE_COMMON_GMRES_HESSENBERG_QR_KERNEL); +GKO_STUB_VALUE_TYPE(GKO_DECLARE_COMMON_GMRES_SOLVE_KRYLOV_KERNEL); + + +} // namespace common_gmres + + namespace gmres { -GKO_STUB_VALUE_TYPE(GKO_DECLARE_GMRES_INITIALIZE_KERNEL); GKO_STUB_VALUE_TYPE(GKO_DECLARE_GMRES_RESTART_KERNEL); -GKO_STUB_VALUE_TYPE(GKO_DECLARE_GMRES_HESSENBERG_QR_KERNEL); -GKO_STUB_VALUE_TYPE(GKO_DECLARE_GMRES_SOLVE_KRYLOV_KERNEL); +GKO_STUB_VALUE_TYPE(GKO_DECLARE_GMRES_MULTI_AXPY_KERNEL); } // namespace gmres diff --git a/core/solver/cb_gmres.cpp b/core/solver/cb_gmres.cpp index 7886ab2ed56..3da9e9a6962 100644 --- a/core/solver/cb_gmres.cpp +++ b/core/solver/cb_gmres.cpp @@ -233,8 +233,6 @@ void CbGmres::apply_dense_impl( * - z: selects which column-element of said krylov vector should be * used */ - const auto num_rhs = dense_b->get_size()[1]; - const auto num_rows = system_matrix_->get_size()[0]; const dim<3> krylov_bases_dim{krylov_dim + 1, num_rows, num_rhs}; Range3dHelper helper(exec, krylov_bases_dim); auto krylov_bases_range = helper.get_range(); @@ -259,18 +257,18 @@ void CbGmres::apply_dense_impl( // 3rd row of arnoldi_norm: the infinity norm of next_krylov_basis // (ONLY when using a scalar accessor) auto arnoldi_norm = VectorNorms::create(exec, dim<2>{3, num_rhs}); - Array final_iter_nums(this->get_executor(), num_rhs); + array final_iter_nums(this->get_executor(), num_rhs); auto y = Vector::create(exec, dim<2>{krylov_dim, num_rhs}); bool one_changed{}; - Array stop_status(this->get_executor(), num_rhs); + array stop_status(this->get_executor(), num_rhs); // reorth_status and num_reorth are both helper variables for GPU // implementations at the moment. // num_reorth := Number of vectors which require a re-orthogonalization // reorth_status := stopping status for the re-orthogonalization, // marking which RHS requires one, and which does not - Array reorth_status(this->get_executor(), num_rhs); - Array num_reorth(this->get_executor(), 1); + array reorth_status(this->get_executor(), num_rhs); + array num_reorth(this->get_executor(), 1); // Initialization exec->run(cb_gmres::make_initialize(dense_b, residual.get(), @@ -305,9 +303,9 @@ void CbGmres::apply_dense_impl( auto after_preconditioner = matrix::Dense::create_with_config_of(dense_x); - Array stop_encountered_rhs(exec->get_master(), num_rhs); - Array fully_converged_rhs(exec->get_master(), num_rhs); - Array host_stop_status( + array stop_encountered_rhs(exec->get_master(), num_rhs); + array fully_converged_rhs(exec->get_master(), num_rhs); + array host_stop_status( this->get_executor()->get_master(), stop_status); for (size_type i = 0; i < stop_encountered_rhs.get_num_elems(); ++i) { stop_encountered_rhs.get_data()[i] = false; diff --git a/core/solver/common_gmres_kernels.hpp b/core/solver/common_gmres_kernels.hpp new file mode 100644 index 00000000000..e6ada48e68b --- /dev/null +++ b/core/solver/common_gmres_kernels.hpp @@ -0,0 +1,101 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_SOLVER_COMMON_GMRES_KERNELS_HPP_ +#define GKO_CORE_SOLVER_COMMON_GMRES_KERNELS_HPP_ + + +#include +#include +#include +#include +#include + + +#include "core/base/kernel_declaration.hpp" + + +namespace gko { +namespace kernels { +namespace common_gmres { + + +#define GKO_DECLARE_COMMON_GMRES_INITIALIZE_KERNEL(_type) \ + void initialize( \ + std::shared_ptr exec, \ + const matrix::Dense<_type>* b, matrix::Dense<_type>* residual, \ + matrix::Dense<_type>* givens_sin, matrix::Dense<_type>* givens_cos, \ + stopping_status* stop_status) + + +#define GKO_DECLARE_COMMON_GMRES_HESSENBERG_QR_KERNEL(_type) \ + void hessenberg_qr( \ + std::shared_ptr exec, \ + matrix::Dense<_type>* givens_sin, matrix::Dense<_type>* givens_cos, \ + matrix::Dense>* residual_norm, \ + matrix::Dense<_type>* residual_norm_collection, \ + matrix::Dense<_type>* hessenberg_iter, size_type iter, \ + size_type* final_iter_nums, const stopping_status* stop_status) + + +#define GKO_DECLARE_COMMON_GMRES_SOLVE_KRYLOV_KERNEL(_type1) \ + void solve_krylov( \ + std::shared_ptr exec, \ + const matrix::Dense<_type1>* residual_norm_collection, \ + const matrix::Dense<_type1>* hessenberg, matrix::Dense<_type1>* y, \ + const size_type* final_iter_nums, const stopping_status* stop_status) + + +#define GKO_DECLARE_ALL_AS_TEMPLATES \ + template \ + GKO_DECLARE_COMMON_GMRES_INITIALIZE_KERNEL(ValueType); \ + template \ + GKO_DECLARE_COMMON_GMRES_HESSENBERG_QR_KERNEL(ValueType); \ + template \ + GKO_DECLARE_COMMON_GMRES_SOLVE_KRYLOV_KERNEL(ValueType) + + +} // namespace common_gmres + + +GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(common_gmres, + GKO_DECLARE_ALL_AS_TEMPLATES); + + +#undef GKO_DECLARE_ALL_AS_TEMPLATES + + +} // namespace kernels +} // namespace gko + + +#endif // GKO_CORE_SOLVER_COMMON_GMRES_KERNELS_HPP_ diff --git a/core/solver/gmres.cpp b/core/solver/gmres.cpp index 33aeb46415d..fe1039981ea 100644 --- a/core/solver/gmres.cpp +++ b/core/solver/gmres.cpp @@ -45,6 +45,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/solver/common_gmres_kernels.hpp" #include "core/solver/gmres_kernels.hpp" @@ -54,10 +55,11 @@ namespace gmres { namespace { -GKO_REGISTER_OPERATION(initialize, gmres::initialize); +GKO_REGISTER_OPERATION(initialize, common_gmres::initialize); GKO_REGISTER_OPERATION(restart, gmres::restart); -GKO_REGISTER_OPERATION(hessenberg_qr, gmres::hessenberg_qr); -GKO_REGISTER_OPERATION(solve_krylov, gmres::solve_krylov); +GKO_REGISTER_OPERATION(hessenberg_qr, common_gmres::hessenberg_qr); +GKO_REGISTER_OPERATION(solve_krylov, common_gmres::solve_krylov); +GKO_REGISTER_OPERATION(multi_axpy, gmres::multi_axpy); } // anonymous namespace @@ -124,8 +126,6 @@ void Gmres::apply_dense_impl(const matrix::Dense* dense_b, const auto num_rhs = dense_b->get_size()[1]; const auto krylov_dim = this->get_krylov_dim(); auto residual = Vector::create_with_config_of(dense_b); - const auto num_rhs = dense_b->get_size()[1]; - const auto num_rows = system_matrix_->get_size()[0]; auto krylov_bases = Vector::create_with_type_of( dense_b, exec, dim<2>{num_rows * (krylov_dim + 1), num_rhs}); std::shared_ptr> preconditioned_vector = @@ -150,7 +150,7 @@ void Gmres::apply_dense_impl(const matrix::Dense* dense_b, // givens_sin = givens_cos = 0 // reset stop status exec->run(gmres::make_initialize(dense_b, residual.get(), givens_sin.get(), - givens_cos.get(), stop_status)); + givens_cos.get(), stop_status.get_data())); // residual = residual - Ax this->get_system_matrix()->apply(neg_one_op.get(), dense_x, one_op.get(), residual.get()); @@ -160,9 +160,9 @@ void Gmres::apply_dense_impl(const matrix::Dense* dense_b, // residual_norm_collection = {residual_norm, unchanged} // krylov_bases(:, 1) = residual / residual_norm // final_iter_nums = {0, ..., 0} - exec->run(gmres::make_restart(residual.get(), residual_norm.get(), - residual_norm_collection.get(), - krylov_bases.get(), final_iter_nums)); + exec->run(gmres::make_restart( + residual.get(), residual_norm.get(), residual_norm_collection.get(), + krylov_bases.get(), final_iter_nums.get_data())); auto stop_criterion = this->get_stop_criterion_factory()->generate( this->get_system_matrix(), @@ -210,11 +210,14 @@ void Gmres::apply_dense_impl(const matrix::Dense* dense_b, // Restart // Solve upper triangular. // y = hessenberg \ residual_norm_collection + exec->run(gmres::make_solve_krylov(residual_norm_collection.get(), + hessenberg.get(), y.get(), + final_iter_nums.get_const_data(), + stop_status.get_const_data())); // before_preconditioner = krylov_bases * y - exec->run(gmres::make_solve_krylov( - residual_norm_collection.get(), krylov_bases.get(), - hessenberg.get(), y.get(), before_preconditioner.get(), - final_iter_nums, stop_status)); + exec->run(gmres::make_multi_axpy( + krylov_bases.get(), y.get(), before_preconditioner.get(), + final_iter_nums.get_const_data(), stop_status.get_data())); // x = x + get_preconditioner() * before_preconditioner this->get_preconditioner()->apply(before_preconditioner.get(), @@ -232,7 +235,8 @@ void Gmres::apply_dense_impl(const matrix::Dense* dense_b, // final_iter_nums = {0, ..., 0} exec->run(gmres::make_restart(residual.get(), residual_norm.get(), residual_norm_collection.get(), - krylov_bases.get(), final_iter_nums)); + krylov_bases.get(), + final_iter_nums.get_data())); restart_iter = 0; } auto this_krylov = krylov_bases->create_submatrix( @@ -303,12 +307,11 @@ void Gmres::apply_dense_impl(const matrix::Dense* dense_b, exec->run(gmres::make_hessenberg_qr( givens_sin.get(), givens_cos.get(), residual_norm.get(), residual_norm_collection.get(), hessenberg_iter.get(), restart_iter, - final_iter_nums, stop_status)); + final_iter_nums.get_data(), stop_status.get_const_data())); restart_iter++; } - // Solve x auto krylov_bases_small = krylov_bases->create_submatrix( span{0, num_rows * (restart_iter + 1)}, span{0, num_rhs}); auto hessenberg_small = hessenberg->create_submatrix( @@ -316,11 +319,13 @@ void Gmres::apply_dense_impl(const matrix::Dense* dense_b, // Solve upper triangular. // y = hessenberg \ residual_norm_collection - // before_preconditioner = krylov_bases * y exec->run(gmres::make_solve_krylov( - residual_norm_collection.get(), krylov_bases_small.get(), - hessenberg_small.get(), y.get(), before_preconditioner.get(), - final_iter_nums, stop_status)); + residual_norm_collection.get(), hessenberg_small.get(), y.get(), + final_iter_nums.get_const_data(), stop_status.get_const_data())); + // before_preconditioner = krylov_bases * y + exec->run(gmres::make_multi_axpy( + krylov_bases_small.get(), y.get(), before_preconditioner.get(), + final_iter_nums.get_const_data(), stop_status.get_data())); // x = x + get_preconditioner() * before_preconditioner this->get_preconditioner()->apply(before_preconditioner.get(), diff --git a/core/solver/gmres_kernels.hpp b/core/solver/gmres_kernels.hpp index 91e6f742c82..3d82a96c7e9 100644 --- a/core/solver/gmres_kernels.hpp +++ b/core/solver/gmres_kernels.hpp @@ -49,54 +49,29 @@ namespace kernels { namespace gmres { -#define GKO_DECLARE_GMRES_INITIALIZE_KERNEL(_type) \ - void initialize( \ - std::shared_ptr exec, \ - const matrix::Dense<_type>* b, matrix::Dense<_type>* residual, \ - matrix::Dense<_type>* givens_sin, matrix::Dense<_type>* givens_cos, \ - array& stop_status) - - #define GKO_DECLARE_GMRES_RESTART_KERNEL(_type) \ void restart(std::shared_ptr exec, \ const matrix::Dense<_type>* residual, \ matrix::Dense>* residual_norm, \ matrix::Dense<_type>* residual_norm_collection, \ matrix::Dense<_type>* krylov_bases, \ - array& final_iter_nums) - - -#define GKO_DECLARE_GMRES_HESSENBERG_QR_KERNEL(_type) \ - void hessenberg_qr(std::shared_ptr exec, \ - matrix::Dense<_type>* givens_sin, \ - matrix::Dense<_type>* givens_cos, \ - matrix::Dense>* residual_norm, \ - matrix::Dense<_type>* residual_norm_collection, \ - matrix::Dense<_type>* hessenberg_iter, size_type iter, \ - array& final_iter_nums, \ - const array& stop_status) - - -#define GKO_DECLARE_GMRES_SOLVE_KRYLOV_KERNEL(_type) \ - void solve_krylov(std::shared_ptr exec, \ - const matrix::Dense<_type>* residual_norm_collection, \ - const matrix::Dense<_type>* krylov_bases, \ - const matrix::Dense<_type>* hessenberg, \ - matrix::Dense<_type>* y, \ - matrix::Dense<_type>* before_preconditioner, \ - const array& final_iter_nums, \ - array& stop_status) - - -#define GKO_DECLARE_ALL_AS_TEMPLATES \ - template \ - GKO_DECLARE_GMRES_INITIALIZE_KERNEL(ValueType); \ - template \ - GKO_DECLARE_GMRES_RESTART_KERNEL(ValueType); \ - template \ - GKO_DECLARE_GMRES_HESSENBERG_QR_KERNEL(ValueType); \ - template \ - GKO_DECLARE_GMRES_SOLVE_KRYLOV_KERNEL(ValueType) + size_type* final_iter_nums) + + +#define GKO_DECLARE_GMRES_MULTI_AXPY_KERNEL(_type) \ + void multi_axpy(std::shared_ptr exec, \ + const matrix::Dense<_type>* krylov_bases, \ + const matrix::Dense<_type>* y, \ + matrix::Dense<_type>* before_preconditioner, \ + const size_type* final_iter_nums, \ + stopping_status* stop_status) + + +#define GKO_DECLARE_ALL_AS_TEMPLATES \ + template \ + GKO_DECLARE_GMRES_RESTART_KERNEL(ValueType); \ + template \ + GKO_DECLARE_GMRES_MULTI_AXPY_KERNEL(ValueType) } // namespace gmres diff --git a/cuda/base/kernel_launch.cuh b/cuda/base/kernel_launch.cuh index 257948b3d4d..d406e91e40a 100644 --- a/cuda/base/kernel_launch.cuh +++ b/cuda/base/kernel_launch.cuh @@ -39,6 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "accessor/cuda_helper.hpp" #include "cuda/base/types.hpp" #include "cuda/components/thread_ids.cuh" @@ -48,6 +49,27 @@ namespace kernels { namespace cuda { +template +struct to_device_type_impl&> { + using type = std::decay_t>()))>; + static type map_to_device(gko::acc::range& range) + { + return gko::acc::as_cuda_range(range); + } +}; + +template +struct to_device_type_impl&> { + using type = std::decay_t>()))>; + static type map_to_device(const gko::acc::range& range) + { + return gko::acc::as_cuda_range(range); + } +}; + + namespace device_std = thrust; diff --git a/hip/base/kernel_launch.hip.hpp b/hip/base/kernel_launch.hip.hpp index 7d5c0a33303..cf16b20e556 100644 --- a/hip/base/kernel_launch.hip.hpp +++ b/hip/base/kernel_launch.hip.hpp @@ -40,6 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "accessor/hip_helper.hpp" #include "hip/base/types.hip.hpp" #include "hip/components/thread_ids.hip.hpp" @@ -49,6 +50,27 @@ namespace kernels { namespace hip { +template +struct to_device_type_impl&> { + using type = std::decay_t>()))>; + static type map_to_device(gko::acc::range& range) + { + return gko::acc::as_hip_range(range); + } +}; + +template +struct to_device_type_impl&> { + using type = std::decay_t>()))>; + static type map_to_device(const gko::acc::range& range) + { + return gko::acc::as_hip_range(range); + } +}; + + namespace device_std = thrust; diff --git a/reference/CMakeLists.txt b/reference/CMakeLists.txt index c3bcd42cf43..a62d78dc18d 100644 --- a/reference/CMakeLists.txt +++ b/reference/CMakeLists.txt @@ -40,6 +40,7 @@ target_sources(ginkgo_reference solver/fcg_kernels.cpp solver/gmres_kernels.cpp solver/cb_gmres_kernels.cpp + solver/common_gmres_kernels.cpp solver/idr_kernels.cpp solver/ir_kernels.cpp solver/lower_trs_kernels.cpp diff --git a/reference/solver/common_gmres_kernels.cpp b/reference/solver/common_gmres_kernels.cpp new file mode 100644 index 00000000000..12363888f02 --- /dev/null +++ b/reference/solver/common_gmres_kernels.cpp @@ -0,0 +1,228 @@ +/************************************************************* +Copyright (c) 2017-2022, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/solver/common_gmres_kernels.hpp" + + +#include +#include +#include +#include +#include +#include + + +#include "core/solver/cb_gmres_kernels.hpp" + + +namespace gko { +namespace kernels { +namespace reference { +/** + * @brief The GMRES solver namespace. + * + * @ingroup gmres + */ +namespace common_gmres { + + +namespace { + + +template +void calculate_sin_and_cos(matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + matrix::Dense* hessenberg_iter, + size_type iter, const size_type rhs) +{ + if (is_zero(hessenberg_iter->at(iter, rhs))) { + givens_cos->at(iter, rhs) = zero(); + givens_sin->at(iter, rhs) = one(); + } else { + auto this_hess = hessenberg_iter->at(iter, rhs); + auto next_hess = hessenberg_iter->at(iter + 1, rhs); + const auto scale = abs(this_hess) + abs(next_hess); + const auto hypotenuse = + scale * sqrt(abs(this_hess / scale) * abs(this_hess / scale) + + abs(next_hess / scale) * abs(next_hess / scale)); + givens_cos->at(iter, rhs) = conj(this_hess) / hypotenuse; + givens_sin->at(iter, rhs) = conj(next_hess) / hypotenuse; + } +} + + +template +void givens_rotation(matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + matrix::Dense* hessenberg_iter, size_type iter, + const stopping_status* stop_status) +{ + for (size_type i = 0; i < hessenberg_iter->get_size()[1]; ++i) { + if (stop_status[i].has_stopped()) { + continue; + } + for (size_type j = 0; j < iter; ++j) { + auto temp = givens_cos->at(j, i) * hessenberg_iter->at(j, i) + + givens_sin->at(j, i) * hessenberg_iter->at(j + 1, i); + hessenberg_iter->at(j + 1, i) = + -conj(givens_sin->at(j, i)) * hessenberg_iter->at(j, i) + + conj(givens_cos->at(j, i)) * hessenberg_iter->at(j + 1, i); + hessenberg_iter->at(j, i) = temp; + // temp = cos(j)*hessenberg(j) + + // sin(j)*hessenberg(j+1) + // hessenberg(j+1) = -conj(sin(j))*hessenberg(j) + + // conj(cos(j))*hessenberg(j+1) + // hessenberg(j) = temp; + } + + calculate_sin_and_cos(givens_sin, givens_cos, hessenberg_iter, iter, i); + + hessenberg_iter->at(iter, i) = + givens_cos->at(iter, i) * hessenberg_iter->at(iter, i) + + givens_sin->at(iter, i) * hessenberg_iter->at(iter + 1, i); + hessenberg_iter->at(iter + 1, i) = zero(); + // hessenberg(iter) = cos(iter)*hessenberg(iter) + + // sin(iter)*hessenberg(iter + 1) + // hessenberg(iter+1) = 0 + } +} + + +template +void calculate_next_residual_norm( + matrix::Dense* givens_sin, matrix::Dense* givens_cos, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, size_type iter, + const stopping_status* stop_status) +{ + for (size_type i = 0; i < residual_norm->get_size()[1]; ++i) { + if (stop_status[i].has_stopped()) { + continue; + } + residual_norm_collection->at(iter + 1, i) = + -conj(givens_sin->at(iter, i)) * + residual_norm_collection->at(iter, i); + residual_norm_collection->at(iter, i) = + givens_cos->at(iter, i) * residual_norm_collection->at(iter, i); + residual_norm->at(0, i) = + abs(residual_norm_collection->at(iter + 1, i)); + } +} + + +} // namespace + + +template +void initialize(std::shared_ptr exec, + const matrix::Dense* b, + matrix::Dense* residual, + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + stopping_status* stop_status) +{ + const auto krylov_dim = givens_sin->get_size()[0]; + using NormValueType = remove_complex; + for (size_type j = 0; j < b->get_size()[1]; ++j) { + for (size_type i = 0; i < b->get_size()[0]; ++i) { + residual->at(i, j) = b->at(i, j); + } + for (size_type i = 0; i < krylov_dim; ++i) { + givens_sin->at(i, j) = zero(); + givens_cos->at(i, j) = zero(); + } + stop_status[j].reset(); + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_COMMON_GMRES_INITIALIZE_KERNEL); + + +template +void hessenberg_qr(std::shared_ptr exec, + matrix::Dense* givens_sin, + matrix::Dense* givens_cos, + matrix::Dense>* residual_norm, + matrix::Dense* residual_norm_collection, + matrix::Dense* hessenberg_iter, size_type iter, + size_type* final_iter_nums, + const stopping_status* stop_status) +{ + for (size_type i = 0; i < givens_sin->get_size()[1]; ++i) { + if (!stop_status[i].has_stopped()) { + final_iter_nums[i]++; + } + } + + givens_rotation(givens_sin, givens_cos, hessenberg_iter, iter, stop_status); + calculate_next_residual_norm(givens_sin, givens_cos, residual_norm, + residual_norm_collection, iter, stop_status); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_COMMON_GMRES_HESSENBERG_QR_KERNEL); + + +template +void solve_krylov(std::shared_ptr exec, + const matrix::Dense* residual_norm_collection, + const matrix::Dense* hessenberg, + matrix::Dense* y, const size_type* final_iter_nums, + const stopping_status* stop_status) +{ + for (size_type k = 0; k < residual_norm_collection->get_size()[1]; ++k) { + if (stop_status[k].is_finalized()) { + continue; + } + for (int i = final_iter_nums[k] - 1; i >= 0; --i) { + auto temp = residual_norm_collection->at(i, k); + for (size_type j = i + 1; j < final_iter_nums[k]; ++j) { + temp -= + hessenberg->at( + i, j * residual_norm_collection->get_size()[1] + k) * + y->at(j, k); + } + y->at(i, k) = + temp / hessenberg->at( + i, i * residual_norm_collection->get_size()[1] + k); + } + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE( + GKO_DECLARE_COMMON_GMRES_SOLVE_KRYLOV_KERNEL); + + +} // namespace common_gmres +} // namespace reference +} // namespace kernels +} // namespace gko diff --git a/reference/solver/gmres_kernels.cpp b/reference/solver/gmres_kernels.cpp index 76b641bb307..eaffcea7d3c 100644 --- a/reference/solver/gmres_kernels.cpp +++ b/reference/solver/gmres_kernels.cpp @@ -52,176 +52,12 @@ namespace reference { namespace gmres { -namespace { - - -template -void calculate_sin_and_cos(matrix::Dense* givens_sin, - matrix::Dense* givens_cos, - matrix::Dense* hessenberg_iter, - size_type iter, const size_type rhs) -{ - if (is_zero(hessenberg_iter->at(iter, rhs))) { - givens_cos->at(iter, rhs) = zero(); - givens_sin->at(iter, rhs) = one(); - } else { - auto this_hess = hessenberg_iter->at(iter, rhs); - auto next_hess = hessenberg_iter->at(iter + 1, rhs); - const auto scale = abs(this_hess) + abs(next_hess); - const auto hypotenuse = - scale * sqrt(abs(this_hess / scale) * abs(this_hess / scale) + - abs(next_hess / scale) * abs(next_hess / scale)); - givens_cos->at(iter, rhs) = conj(this_hess) / hypotenuse; - givens_sin->at(iter, rhs) = conj(next_hess) / hypotenuse; - } -} - - -template -void givens_rotation(matrix::Dense* givens_sin, - matrix::Dense* givens_cos, - matrix::Dense* hessenberg_iter, size_type iter, - const stopping_status* stop_status) -{ - for (size_type i = 0; i < hessenberg_iter->get_size()[1]; ++i) { - if (stop_status[i].has_stopped()) { - continue; - } - for (size_type j = 0; j < iter; ++j) { - auto temp = givens_cos->at(j, i) * hessenberg_iter->at(j, i) + - givens_sin->at(j, i) * hessenberg_iter->at(j + 1, i); - hessenberg_iter->at(j + 1, i) = - -conj(givens_sin->at(j, i)) * hessenberg_iter->at(j, i) + - conj(givens_cos->at(j, i)) * hessenberg_iter->at(j + 1, i); - hessenberg_iter->at(j, i) = temp; - // temp = cos(j)*hessenberg(j) + - // sin(j)*hessenberg(j+1) - // hessenberg(j+1) = -conj(sin(j))*hessenberg(j) + - // conj(cos(j))*hessenberg(j+1) - // hessenberg(j) = temp; - } - - calculate_sin_and_cos(givens_sin, givens_cos, hessenberg_iter, iter, i); - - hessenberg_iter->at(iter, i) = - givens_cos->at(iter, i) * hessenberg_iter->at(iter, i) + - givens_sin->at(iter, i) * hessenberg_iter->at(iter + 1, i); - hessenberg_iter->at(iter + 1, i) = zero(); - // hessenberg(iter) = cos(iter)*hessenberg(iter) + - // sin(iter)*hessenberg(iter + 1) - // hessenberg(iter+1) = 0 - } -} - - -template -void calculate_next_residual_norm( - matrix::Dense* givens_sin, matrix::Dense* givens_cos, - matrix::Dense>* residual_norm, - matrix::Dense* residual_norm_collection, size_type iter, - const stopping_status* stop_status) -{ - for (size_type i = 0; i < residual_norm->get_size()[1]; ++i) { - if (stop_status[i].has_stopped()) { - continue; - } - residual_norm_collection->at(iter + 1, i) = - -conj(givens_sin->at(iter, i)) * - residual_norm_collection->at(iter, i); - residual_norm_collection->at(iter, i) = - givens_cos->at(iter, i) * residual_norm_collection->at(iter, i); - residual_norm->at(0, i) = - abs(residual_norm_collection->at(iter + 1, i)); - } -} - - -template -void solve_upper_triangular( - const matrix::Dense* residual_norm_collection, - const matrix::Dense* hessenberg, matrix::Dense* y, - const size_type* final_iter_nums, const stopping_status* stop_status) -{ - for (size_type k = 0; k < residual_norm_collection->get_size()[1]; ++k) { - if (stop_status[k].is_finalized()) { - continue; - } - for (int i = final_iter_nums[k] - 1; i >= 0; --i) { - auto temp = residual_norm_collection->at(i, k); - for (size_type j = i + 1; j < final_iter_nums[k]; ++j) { - temp -= - hessenberg->at( - i, j * residual_norm_collection->get_size()[1] + k) * - y->at(j, k); - } - y->at(i, k) = - temp / hessenberg->at( - i, i * residual_norm_collection->get_size()[1] + k); - } - } -} - - -template -void calculate_qy(const matrix::Dense* krylov_bases, - const matrix::Dense* y, - matrix::Dense* before_preconditioner, - const size_type* final_iter_nums, - stopping_status* stop_status) -{ - const auto krylov_bases_rowoffset = before_preconditioner->get_size()[0]; - for (size_type k = 0; k < before_preconditioner->get_size()[1]; ++k) { - if (stop_status[k].is_finalized()) { - continue; - } - for (size_type i = 0; i < before_preconditioner->get_size()[0]; ++i) { - before_preconditioner->at(i, k) = zero(); - for (size_type j = 0; j < final_iter_nums[k]; ++j) { - before_preconditioner->at(i, k) += - krylov_bases->at(i + j * krylov_bases_rowoffset, k) * - y->at(j, k); - } - } - stop_status[k].finalize(); - } -} - - -} // namespace - - -template -void initialize(std::shared_ptr exec, - const matrix::Dense* b, - matrix::Dense* residual, - matrix::Dense* givens_sin, - matrix::Dense* givens_cos, - array& stop_status) -{ - const auto krylov_dim = givens_sin->get_size()[0]; - using NormValueType = remove_complex; - for (size_type j = 0; j < b->get_size()[1]; ++j) { - for (size_type i = 0; i < b->get_size()[0]; ++i) { - residual->at(i, j) = b->at(i, j); - } - for (size_type i = 0; i < krylov_dim; ++i) { - givens_sin->at(i, j) = zero(); - givens_cos->at(i, j) = zero(); - } - stop_status.get_data()[j].reset(); - } -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_INITIALIZE_KERNEL); - - template void restart(std::shared_ptr exec, const matrix::Dense* residual, matrix::Dense>* residual_norm, matrix::Dense* residual_norm_collection, - matrix::Dense* krylov_bases, - array& final_iter_nums) + matrix::Dense* krylov_bases, size_type* final_iter_nums) { for (size_type j = 0; j < residual->get_size()[1]; ++j) { // Calculate residual norm @@ -235,7 +71,7 @@ void restart(std::shared_ptr exec, krylov_bases->at(i, j) = residual->at(i, j) / residual_norm->at(0, j); } - final_iter_nums.get_data()[j] = 0; + final_iter_nums[j] = 0; } } @@ -243,49 +79,32 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_RESTART_KERNEL); template -void hessenberg_qr(std::shared_ptr exec, - matrix::Dense* givens_sin, - matrix::Dense* givens_cos, - matrix::Dense>* residual_norm, - matrix::Dense* residual_norm_collection, - matrix::Dense* hessenberg_iter, size_type iter, - array& final_iter_nums, - const array& stop_status) +void multi_axpy(std::shared_ptr exec, + const matrix::Dense* krylov_bases, + const matrix::Dense* y, + matrix::Dense* before_preconditioner, + const size_type* final_iter_nums, stopping_status* stop_status) { - for (size_type i = 0; i < final_iter_nums.get_num_elems(); ++i) { - if (!stop_status.get_const_data()[i].has_stopped()) { - final_iter_nums.get_data()[i]++; + const auto krylov_bases_rowoffset = before_preconditioner->get_size()[0]; + for (size_type k = 0; k < before_preconditioner->get_size()[1]; ++k) { + if (stop_status[k].is_finalized()) { + continue; + } + for (size_type i = 0; i < before_preconditioner->get_size()[0]; ++i) { + before_preconditioner->at(i, k) = zero(); + for (size_type j = 0; j < final_iter_nums[k]; ++j) { + before_preconditioner->at(i, k) += + krylov_bases->at(i + j * krylov_bases_rowoffset, k) * + y->at(j, k); + } + } + if (stop_status[k].has_stopped()) { + stop_status[k].finalize(); } } - - givens_rotation(givens_sin, givens_cos, hessenberg_iter, iter, - stop_status.get_const_data()); - calculate_next_residual_norm(givens_sin, givens_cos, residual_norm, - residual_norm_collection, iter, - stop_status.get_const_data()); -} - -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_HESSENBERG_QR_KERNEL); - - -template -void solve_krylov(std::shared_ptr exec, - const matrix::Dense* residual_norm_collection, - const matrix::Dense* krylov_bases, - const matrix::Dense* hessenberg, - matrix::Dense* y, - matrix::Dense* before_preconditioner, - const array& final_iter_nums, - array& stop_status) -{ - solve_upper_triangular(residual_norm_collection, hessenberg, y, - final_iter_nums.get_const_data(), - stop_status.get_const_data()); - calculate_qy(krylov_bases, y, before_preconditioner, - final_iter_nums.get_const_data(), stop_status.get_data()); } -GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_SOLVE_KRYLOV_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_GMRES_MULTI_AXPY_KERNEL); } // namespace gmres diff --git a/test/solver/gmres_kernels.cpp b/test/solver/gmres_kernels.cpp index 41b32d1609c..3e0ce136717 100644 --- a/test/solver/gmres_kernels.cpp +++ b/test/solver/gmres_kernels.cpp @@ -48,6 +48,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "core/solver/common_gmres_kernels.hpp" #include "core/solver/gmres_kernels.hpp" #include "core/test/utils.hpp" #include "test/utils/executor.hpp" @@ -167,8 +168,8 @@ class Gmres : public ::testing::Test { std::default_random_engine rand_engine; - std::unique_ptr mtx; - std::unique_ptr d_mtx; + std::shared_ptr mtx; + std::shared_ptr d_mtx; std::unique_ptr d_gmres_factory; std::unique_ptr ref_gmres_factory; @@ -208,12 +209,12 @@ TEST_F(Gmres, GmresKernelInitializeIsEquivalentToRef) { initialize_data(); - gko::kernels::reference::gmres::initialize(ref, b.get(), residual.get(), - givens_sin.get(), - givens_cos.get(), stop_status); - gko::kernels::EXEC_NAMESPACE::gmres::initialize( + gko::kernels::reference::common_gmres::initialize( + ref, b.get(), residual.get(), givens_sin.get(), givens_cos.get(), + stop_status.get_data()); + gko::kernels::EXEC_NAMESPACE::common_gmres::initialize( exec, d_b.get(), d_residual.get(), d_givens_sin.get(), - d_givens_cos.get(), d_stop_status); + d_givens_cos.get(), d_stop_status.get_data()); GKO_ASSERT_MTX_NEAR(d_residual, residual, r::value); GKO_ASSERT_MTX_NEAR(d_givens_sin, givens_sin, r::value); @@ -230,11 +231,12 @@ TEST_F(Gmres, GmresKernelRestartIsEquivalentToRef) gko::kernels::reference::gmres::restart( ref, residual.get(), residual_norm.get(), - residual_norm_collection.get(), krylov_bases.get(), final_iter_nums); + residual_norm_collection.get(), krylov_bases.get(), + final_iter_nums.get_data()); gko::kernels::EXEC_NAMESPACE::gmres::restart( exec, d_residual.get(), d_residual_norm.get(), d_residual_norm_collection.get(), d_krylov_bases.get(), - d_final_iter_nums); + d_final_iter_nums.get_data()); GKO_ASSERT_MTX_NEAR(d_residual_norm, residual_norm, r::value); GKO_ASSERT_MTX_NEAR(d_residual_norm_collection, residual_norm_collection, @@ -249,14 +251,14 @@ TEST_F(Gmres, GmresKernelHessenbergQRIsEquivalentToRef) initialize_data(); int iter = 5; - gko::kernels::reference::gmres::hessenberg_qr( + gko::kernels::reference::common_gmres::hessenberg_qr( ref, givens_sin.get(), givens_cos.get(), residual_norm.get(), residual_norm_collection.get(), hessenberg_iter.get(), iter, - final_iter_nums, stop_status); - gko::kernels::EXEC_NAMESPACE::gmres::hessenberg_qr( + final_iter_nums.get_data(), stop_status.get_const_data()); + gko::kernels::EXEC_NAMESPACE::common_gmres::hessenberg_qr( exec, d_givens_sin.get(), d_givens_cos.get(), d_residual_norm.get(), d_residual_norm_collection.get(), d_hessenberg_iter.get(), iter, - d_final_iter_nums, d_stop_status); + d_final_iter_nums.get_data(), d_stop_status.get_const_data()); GKO_ASSERT_MTX_NEAR(d_givens_sin, givens_sin, r::value); GKO_ASSERT_MTX_NEAR(d_givens_cos, givens_cos, r::value); @@ -275,14 +277,14 @@ TEST_F(Gmres, GmresKernelHessenbergQROnSingleRHSIsEquivalentToRef) initialize_data(1); int iter = 5; - gko::kernels::reference::gmres::hessenberg_qr( + gko::kernels::reference::common_gmres::hessenberg_qr( ref, givens_sin.get(), givens_cos.get(), residual_norm.get(), residual_norm_collection.get(), hessenberg_iter.get(), iter, - final_iter_nums, stop_status); - gko::kernels::EXEC_NAMESPACE::gmres::hessenberg_qr( + final_iter_nums.get_data(), stop_status.get_const_data()); + gko::kernels::EXEC_NAMESPACE::common_gmres::hessenberg_qr( exec, d_givens_sin.get(), d_givens_cos.get(), d_residual_norm.get(), d_residual_norm_collection.get(), d_hessenberg_iter.get(), iter, - d_final_iter_nums, d_stop_status); + d_final_iter_nums.get_data(), d_stop_status.get_const_data()); GKO_ASSERT_MTX_NEAR(d_givens_sin, givens_sin, r::value); GKO_ASSERT_MTX_NEAR(d_givens_cos, givens_cos, r::value); @@ -300,17 +302,30 @@ TEST_F(Gmres, GmresKernelSolveKrylovIsEquivalentToRef) { initialize_data(); - gko::kernels::reference::gmres::solve_krylov( - ref, residual_norm_collection.get(), krylov_bases.get(), - hessenberg.get(), y.get(), before_preconditioner.get(), final_iter_nums, - stop_status); - gko::kernels::EXEC_NAMESPACE::gmres::solve_krylov( - exec, d_residual_norm_collection.get(), d_krylov_bases.get(), - d_hessenberg.get(), d_y.get(), d_before_preconditioner.get(), - d_final_iter_nums, d_stop_status); + gko::kernels::reference::common_gmres::solve_krylov( + ref, residual_norm_collection.get(), hessenberg.get(), y.get(), + final_iter_nums.get_const_data(), stop_status.get_const_data()); + gko::kernels::EXEC_NAMESPACE::common_gmres::solve_krylov( + exec, d_residual_norm_collection.get(), d_hessenberg.get(), d_y.get(), + d_final_iter_nums.get_const_data(), d_stop_status.get_const_data()); GKO_ASSERT_MTX_NEAR(d_y, y, r::value); - GKO_ASSERT_MTX_NEAR(d_x, x, r::value); +} + + +TEST_F(Gmres, GmresKernelMultiAxpyIsEquivalentToRef) +{ + initialize_data(); + + gko::kernels::reference::gmres::multi_axpy( + ref, krylov_bases.get(), y.get(), before_preconditioner.get(), + final_iter_nums.get_const_data(), stop_status.get_data()); + gko::kernels::EXEC_NAMESPACE::gmres::multi_axpy( + exec, d_krylov_bases.get(), d_y.get(), d_before_preconditioner.get(), + d_final_iter_nums.get_const_data(), d_stop_status.get_data()); + + GKO_ASSERT_MTX_NEAR(d_before_preconditioner, before_preconditioner, + r::value); GKO_ASSERT_ARRAY_EQ(stop_status, d_stop_status); } @@ -319,8 +334,8 @@ TEST_F(Gmres, GmresApplyOneRHSIsEquivalentToRef) { int m = 123; int n = 1; - auto ref_solver = ref_gmres_factory->generate(gko::share(mtx)); - auto d_solver = d_gmres_factory->generate(gko::share(d_mtx)); + auto ref_solver = ref_gmres_factory->generate(mtx); + auto d_solver = d_gmres_factory->generate(d_mtx); auto b = gen_mtx(m, n); auto x = gen_mtx(m, n); auto d_b = Mtx::create(exec); @@ -340,8 +355,8 @@ TEST_F(Gmres, GmresApplyMultipleRHSIsEquivalentToRef) { int m = 123; int n = 5; - auto ref_solver = ref_gmres_factory->generate(gko::share(mtx)); - auto omp_solver = d_gmres_factory->generate(gko::share(d_mtx)); + auto ref_solver = ref_gmres_factory->generate(mtx); + auto omp_solver = d_gmres_factory->generate(d_mtx); auto b = gen_mtx(m, n); auto x = gen_mtx(m, n); auto d_b = Mtx::create(exec);