ginkgo-project · MarcelKoch · Sep 28, 2022 · Aug 16, 2022 · Apr 21, 2022 · Apr 21, 2022
diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
@@ -9,6 +9,15 @@ function(ginkgo_build_test_name test_name target_name)
     set(${target_name} ${TEST_TARGET_NAME} PARENT_SCOPE)
 endfunction(ginkgo_build_test_name)
 
+function(ginkgo_create_gtest_mpi_main)
+    add_library(gtest_mpi_main "")
+    target_sources(gtest_mpi_main
+      PRIVATE
+      ${PROJECT_SOURCE_DIR}/core/test/mpi/gtest/mpi_listener.cpp)
+    find_package(MPI REQUIRED)
+    target_link_libraries(gtest_mpi_main PRIVATE GTest::GTest MPI::MPI_CXX)
+endfunction(ginkgo_create_gtest_mpi_main)
+
 ## Set up shared target properties and handle ADDITIONAL_LIBRARIES/ADDITIONAL_INCLUDES
 ## `MPI_SIZE size` causes the tests to be run with `size` MPI processes.
 function(ginkgo_set_test_target_properties test_target_name)
@@ -23,6 +32,9 @@ function(ginkgo_set_test_target_properties test_target_name)
         target_link_libraries(${test_target_name} PRIVATE "${GINKGO_CIRCULAR_DEPS_FLAGS}")
     endif()
     if (set_properties_MPI_SIZE)
+        if(NOT TARGET gtest_mpi_main)
+            ginkgo_create_gtest_mpi_main()
+        endif()
         set(gtest_main gtest_mpi_main MPI::MPI_CXX)
     else()
         set(gtest_main GTest::Main)
@@ -80,7 +92,7 @@ function(ginkgo_create_dpcpp_test test_name)
     target_compile_features(${test_target_name} PUBLIC cxx_std_17)
     target_compile_options(${test_target_name} PRIVATE ${GINKGO_DPCPP_FLAGS})
     target_link_options(${test_target_name} PRIVATE -fsycl-device-code-split=per_kernel)
-    ginkgo_internal_add_test(${test_target_name} ${ARGN})
+    ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
     ginkgo_add_test(${test_name} ${test_target_name} ${ARGN})
     # Note: MKL_ENV is empty on linux. Maybe need to apply MKL_ENV to all test.
     if (MKL_ENV)
@@ -115,7 +127,7 @@ function(ginkgo_create_cuda_test_internal test_name filename test_target_name)
     if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
         set_target_properties(${test_target_name} PROPERTIES CUDA_ARCHITECTURES OFF)
     endif()
-    ginkgo_internal_add_test(${test_target_name} ${ARGN})
+    ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
     ginkgo_add_test(${test_name} ${test_target_name} ${ARGN})
 endfunction(ginkgo_create_cuda_test_internal)
 
@@ -205,7 +217,7 @@ function(ginkgo_create_common_test_internal test_name exec_type exec)
         target_compile_definitions(${test_target_name} PRIVATE GINKGO_COMMON_SINGLE_MODE=1)
         target_compile_definitions(${test_target_name} PRIVATE GINKGO_DPCPP_SINGLE_MODE=1)
     endif()
-    ginkgo_internal_add_test(${test_target_name} ${ARGN})
+    ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
     ginkgo_add_test(${test_name}_${exec} ${test_target_name} ${ARGN})
 endfunction(ginkgo_create_common_test_internal)
 

diff --git a/common/unified/matrix/dense_kernels.cpp b/common/unified/matrix/dense_kernels.cpp
@@ -383,13 +383,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
 template <typename ValueType>
 void compute_squared_norm2(std::shared_ptr<const DefaultExecutor> exec,
                            const matrix::Dense<ValueType>* x,
-                           matrix::Dense<remove_complex<ValueType>>* result)
+                           matrix::Dense<remove_complex<ValueType>>* result,
+                           array<char>& tmp)
 {
-    run_kernel_col_reduction(
+    run_kernel_col_reduction_cached(
         exec,
         [] GKO_KERNEL(auto i, auto j, auto x) { return squared_norm(x(i, j)); },
         GKO_KERNEL_REDUCE_SUM(remove_complex<ValueType>), result->get_values(),
-        x->get_size(), x);
+        x->get_size(), tmp, x);
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(

diff --git a/core/distributed/helpers.hpp b/core/distributed/helpers.hpp
@@ -0,0 +1,101 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2022, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <memory>
+
+
+#include <ginkgo/config.hpp>
+#include <ginkgo/core/distributed/vector.hpp>
+#include <ginkgo/core/matrix/dense.hpp>
+
+
+namespace gko {
+namespace detail {
+
+
+template <typename ValueType>
+std::unique_ptr<matrix::Dense<ValueType>> create_with_config_of(
+    const matrix::Dense<ValueType>* mtx)
+{
+    return matrix::Dense<ValueType>::create(mtx->get_executor(),
+                                            mtx->get_size(), mtx->get_stride());
+}
+
+
+template <typename ValueType>
+const matrix::Dense<ValueType>* get_local(const matrix::Dense<ValueType>* mtx)
+{
+    return mtx;
+}
+
+
+template <typename ValueType>
+matrix::Dense<ValueType>* get_local(matrix::Dense<ValueType>* mtx)
+{
+    return mtx;
+}
+
+
+#if GINKGO_BUILD_MPI
+
+
+template <typename ValueType>
+std::unique_ptr<distributed::Vector<ValueType>> create_with_config_of(
+    const distributed::Vector<ValueType>* mtx)
+{
+    return distributed::Vector<ValueType>::create(
+        mtx->get_executor(), mtx->get_communicator(), mtx->get_size(),
+        mtx->get_local_vector()->get_size(),
+        mtx->get_local_vector()->get_stride());
+}
+
+
+template <typename ValueType>
+matrix::Dense<ValueType>* get_local(distributed::Vector<ValueType>* mtx)
+{
+    return const_cast<matrix::Dense<ValueType>*>(mtx->get_local_vector());
+}
+
+
+template <typename ValueType>
+const matrix::Dense<ValueType>* get_local(
+    const distributed::Vector<ValueType>* mtx)
+{
+    return mtx->get_local_vector();
+}
+
+
+#endif
+
+
+}  // namespace detail
+}  // namespace gko
diff --git a/core/distributed/matrix.cpp b/core/distributed/matrix.cpp
@@ -33,6 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/distributed/matrix.hpp>
 
 
+#include <ginkgo/core/base/precision_dispatch.hpp>
 #include <ginkgo/core/distributed/vector.hpp>
 #include <ginkgo/core/matrix/csr.hpp>
 
@@ -297,65 +298,70 @@ template <typename ValueType, typename LocalIndexType, typename GlobalIndexType>
 void Matrix<ValueType, LocalIndexType, GlobalIndexType>::apply_impl(
     const LinOp* b, LinOp* x) const
 {
-    auto dense_b = as<global_vector_type>(b);
-    auto dense_x = as<global_vector_type>(x);
-    auto x_exec = x->get_executor();
-    auto local_x = gko::matrix::Dense<ValueType>::create(
-        x_exec, dense_x->get_local_vector()->get_size(),
-        gko::make_array_view(
-            x_exec, dense_x->get_local_vector()->get_num_stored_elements(),
-            dense_x->get_local_values()),
-        dense_x->get_local_vector()->get_stride());
-    if (this->get_non_local_matrix()->get_size()) {
-        auto req = this->communicate(dense_b->get_local_vector());
-        local_mtx_->apply(dense_b->get_local_vector(), local_x.get());
-        req.wait();
-        auto exec = this->get_executor();
-        auto use_host_buffer =
-            exec->get_master() != exec && !gko::mpi::is_gpu_aware();
-        if (use_host_buffer) {
-            recv_buffer_->copy_from(host_recv_buffer_.get());
-        }
-        non_local_mtx_->apply(one_scalar_.get(), recv_buffer_.get(),
-                              one_scalar_.get(), local_x.get());
-    } else {
-        local_mtx_->apply(dense_b->get_local_vector(), local_x.get());
-    }
+    distributed::precision_dispatch_real_complex<ValueType>(
+        [this](const auto dense_b, auto dense_x) {
+            auto x_exec = dense_x->get_executor();
+            auto local_x = gko::matrix::Dense<ValueType>::create(
+                x_exec, dense_x->get_local_vector()->get_size(),
+                gko::make_array_view(
+                    x_exec,
+                    dense_x->get_local_vector()->get_num_stored_elements(),
+                    dense_x->get_local_values()),
+                dense_x->get_local_vector()->get_stride());
+            if (this->get_non_local_matrix()->get_size()) {
+                auto req = this->communicate(dense_b->get_local_vector());
+                local_mtx_->apply(dense_b->get_local_vector(), local_x.get());
+                req.wait();
+                auto exec = this->get_executor();
+                auto use_host_buffer =
+                    exec->get_master() != exec && !gko::mpi::is_gpu_aware();
+                if (use_host_buffer) {
+                    recv_buffer_->copy_from(host_recv_buffer_.get());
+                }
+                non_local_mtx_->apply(one_scalar_.get(), recv_buffer_.get(),
+                                      one_scalar_.get(), local_x.get());
+            } else {
+                local_mtx_->apply(dense_b->get_local_vector(), local_x.get());
+            }
+        },
+        b, x);
 }
 
 
 template <typename ValueType, typename LocalIndexType, typename GlobalIndexType>
 void Matrix<ValueType, LocalIndexType, GlobalIndexType>::apply_impl(
     const LinOp* alpha, const LinOp* b, const LinOp* beta, LinOp* x) const
 {
-    auto dense_b = as<global_vector_type>(b);
-    auto dense_x = as<global_vector_type>(x);
-    const auto x_exec = x->get_executor();
-    auto local_x = gko::matrix::Dense<ValueType>::create(
-        x_exec, dense_x->get_local_vector()->get_size(),
-        gko::make_array_view(
-            x_exec, dense_x->get_local_vector()->get_num_stored_elements(),
-            dense_x->get_local_values()),
-        dense_x->get_local_vector()->get_stride());
-    auto local_alpha = as<local_vector_type>(alpha);
-    auto local_beta = as<local_vector_type>(beta);
-    if (this->get_non_local_matrix()->get_size()) {
-        auto req = this->communicate(dense_b->get_local_vector());
-        local_mtx_->apply(local_alpha, dense_b->get_local_vector(), local_beta,
-                          local_x.get());
-        req.wait();
-        auto exec = this->get_executor();
-        auto use_host_buffer =
-            exec->get_master() != exec && !gko::mpi::is_gpu_aware();
-        if (use_host_buffer) {
-            recv_buffer_->copy_from(host_recv_buffer_.get());
-        }
-        non_local_mtx_->apply(local_alpha, recv_buffer_.get(),
-                              one_scalar_.get(), local_x.get());
-    } else {
-        local_mtx_->apply(local_alpha, dense_b->get_local_vector(), local_beta,
-                          local_x.get());
-    }
+    distributed::precision_dispatch_real_complex<ValueType>(
+        [this](const auto local_alpha, const auto dense_b,
+               const auto local_beta, auto dense_x) {
+            const auto x_exec = dense_x->get_executor();
+            auto local_x = gko::matrix::Dense<ValueType>::create(
+                x_exec, dense_x->get_local_vector()->get_size(),
+                gko::make_array_view(
+                    x_exec,
+                    dense_x->get_local_vector()->get_num_stored_elements(),
+                    dense_x->get_local_values()),
+                dense_x->get_local_vector()->get_stride());
+            if (this->get_non_local_matrix()->get_size()) {
+                auto req = this->communicate(dense_b->get_local_vector());
+                local_mtx_->apply(local_alpha, dense_b->get_local_vector(),
+                                  local_beta, local_x.get());
+                req.wait();
+                auto exec = this->get_executor();
+                auto use_host_buffer =
+                    exec->get_master() != exec && !gko::mpi::is_gpu_aware();
+                if (use_host_buffer) {
+                    recv_buffer_->copy_from(host_recv_buffer_.get());
+                }
+                non_local_mtx_->apply(local_alpha, recv_buffer_.get(),
+                                      one_scalar_.get(), local_x.get());
+            } else {
+                local_mtx_->apply(local_alpha, dense_b->get_local_vector(),
+                                  local_beta, local_x.get());
+            }
+        },
+        alpha, b, beta, x);
 }
 
 
@@ -394,7 +400,6 @@ Matrix<ValueType, LocalIndexType, GlobalIndexType>::operator=(
         gather_idxs_ = other.gather_idxs_;
         send_offsets_ = other.send_offsets_;
         recv_offsets_ = other.recv_offsets_;
-        recv_sizes_ = other.recv_sizes_;
         send_sizes_ = other.send_sizes_;
         recv_sizes_ = other.recv_sizes_;
         non_local_to_global_ = other.non_local_to_global_;
@@ -419,7 +424,6 @@ Matrix<ValueType, LocalIndexType, GlobalIndexType>::operator=(Matrix&& other)
         gather_idxs_ = std::move(other.gather_idxs_);
         send_offsets_ = std::move(other.send_offsets_);
         recv_offsets_ = std::move(other.recv_offsets_);
-        recv_sizes_ = std::move(other.recv_sizes_);
         send_sizes_ = std::move(other.send_sizes_);
         recv_sizes_ = std::move(other.recv_sizes_);
         non_local_to_global_ = std::move(other.non_local_to_global_);