Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds distributed support for several solvers #976

Merged
merged 40 commits into from
Sep 28, 2022
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
6b57030
fix cmake after rebase
MarcelKoch Aug 16, 2022
0145f14
adds non const real_view to distributed vector
MarcelKoch Apr 21, 2022
c0f9289
adds tmp array to compute_squared_norm2
MarcelKoch Apr 21, 2022
33df1cf
adds reduction with tmp array to distributed vector
MarcelKoch Apr 21, 2022
524fc39
adds dispatch for distributed vector
MarcelKoch Feb 23, 2022
cd729de
adds helper functions to access local data of dense/dist::vector
MarcelKoch Feb 23, 2022
7784dc4
adds create_with_config_of and get_stride to distributed vector
MarcelKoch Jul 11, 2022
9885d58
adds distributed capabilities to some solvers
MarcelKoch Feb 23, 2022
55e7b40
add distributed dispatch to residual norm criteria
MarcelKoch Feb 23, 2022
38ccce9
adds distributed solver example
MarcelKoch Feb 23, 2022
f5e1e56
small rename
MarcelKoch Feb 24, 2022
0ff3f0f
add distributed dispatch to identity
MarcelKoch Feb 24, 2022
de54da6
add generic distributed solver tests
MarcelKoch Feb 24, 2022
b9351a5
fixes residual norm dispatch
MarcelKoch Feb 24, 2022
370ca74
adds mixed + complex apply to solver tests
MarcelKoch Feb 25, 2022
bc05ee6
adds complex-to-real dispatch for distributed
MarcelKoch Feb 25, 2022
8f5e6c7
fixes non-mpi residual norm dispatch
MarcelKoch Mar 1, 2022
822646c
adds precision dispatch to distributed matrix apply
MarcelKoch Apr 22, 2022
b3f99fb
fix formatting
MarcelKoch Apr 25, 2022
d6ad6c5
review updates
MarcelKoch May 5, 2022
c8d5b2f
Format files
ginkgo-bot May 5, 2022
549cf9d
review updates
MarcelKoch May 5, 2022
4c4ec62
review updates
MarcelKoch May 9, 2022
c134687
add i_send/i_recv with datatypes
MarcelKoch May 11, 2022
4182437
use template vector type for Idr iterate
MarcelKoch May 5, 2022
1c114ff
use device allocation mode and disable device reset for distributed t…
MarcelKoch May 23, 2022
8c3add8
Format files
ginkgo-bot Jun 14, 2022
8c3e791
Format files
ginkgo-bot Jul 12, 2022
a6d00b7
fixes matrix' copy and move assignment
MarcelKoch Aug 24, 2022
1d8d148
adds distributed example kind
MarcelKoch Aug 26, 2022
af1e7ca
removes template apply_impl of Bicg
MarcelKoch Aug 26, 2022
c05f6a1
review updates:
MarcelKoch Aug 26, 2022
0f58c62
Format files
ginkgo-bot Aug 26, 2022
f504af1
fixes residual_norm precision dispatch for non-mpi
MarcelKoch Sep 19, 2022
6dc6667
adds test with different partition types
MarcelKoch Sep 19, 2022
33fd976
removes special case if no non-local matrix
MarcelKoch Sep 20, 2022
be0983f
frees mpi request and makes it move-only
MarcelKoch Sep 20, 2022
06f9221
review updates:
MarcelKoch Sep 26, 2022
cae4b88
Format files
ginkgo-bot Sep 26, 2022
8113be4
Merge branch 'distributed-develop' into distributed-solvers
MarcelKoch Sep 27, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions cmake/create_test.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,15 @@ function(ginkgo_build_test_name test_name target_name)
set(${target_name} ${TEST_TARGET_NAME} PARENT_SCOPE)
endfunction(ginkgo_build_test_name)

function(ginkgo_create_gtest_mpi_main)
add_library(gtest_mpi_main "")
target_sources(gtest_mpi_main
PRIVATE
${PROJECT_SOURCE_DIR}/core/test/mpi/gtest/mpi_listener.cpp)
find_package(MPI REQUIRED)
target_link_libraries(gtest_mpi_main PRIVATE GTest::GTest MPI::MPI_CXX)
endfunction(ginkgo_create_gtest_mpi_main)

## Set up shared target properties and handle ADDITIONAL_LIBRARIES/ADDITIONAL_INCLUDES
## `MPI_SIZE size` causes the tests to be run with `size` MPI processes.
function(ginkgo_set_test_target_properties test_target_name)
Expand All @@ -23,6 +32,9 @@ function(ginkgo_set_test_target_properties test_target_name)
target_link_libraries(${test_target_name} PRIVATE "${GINKGO_CIRCULAR_DEPS_FLAGS}")
endif()
if (set_properties_MPI_SIZE)
if(NOT TARGET gtest_mpi_main)
ginkgo_create_gtest_mpi_main()
endif()
set(gtest_main gtest_mpi_main MPI::MPI_CXX)
else()
set(gtest_main GTest::Main)
Expand Down Expand Up @@ -80,7 +92,7 @@ function(ginkgo_create_dpcpp_test test_name)
target_compile_features(${test_target_name} PUBLIC cxx_std_17)
target_compile_options(${test_target_name} PRIVATE ${GINKGO_DPCPP_FLAGS})
target_link_options(${test_target_name} PRIVATE -fsycl-device-code-split=per_kernel)
ginkgo_internal_add_test(${test_target_name} ${ARGN})
ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
ginkgo_add_test(${test_name} ${test_target_name} ${ARGN})
# Note: MKL_ENV is empty on linux. Maybe need to apply MKL_ENV to all test.
if (MKL_ENV)
Expand Down Expand Up @@ -115,7 +127,7 @@ function(ginkgo_create_cuda_test_internal test_name filename test_target_name)
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.18)
set_target_properties(${test_target_name} PROPERTIES CUDA_ARCHITECTURES OFF)
endif()
ginkgo_internal_add_test(${test_target_name} ${ARGN})
ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
ginkgo_add_test(${test_name} ${test_target_name} ${ARGN})
endfunction(ginkgo_create_cuda_test_internal)

Expand Down Expand Up @@ -205,7 +217,7 @@ function(ginkgo_create_common_test_internal test_name exec_type exec)
target_compile_definitions(${test_target_name} PRIVATE GINKGO_COMMON_SINGLE_MODE=1)
target_compile_definitions(${test_target_name} PRIVATE GINKGO_DPCPP_SINGLE_MODE=1)
endif()
ginkgo_internal_add_test(${test_target_name} ${ARGN})
ginkgo_set_test_target_properties(${test_target_name} ${ARGN})
ginkgo_add_test(${test_name}_${exec} ${test_target_name} ${ARGN})
endfunction(ginkgo_create_common_test_internal)

Expand Down
7 changes: 4 additions & 3 deletions common/unified/matrix/dense_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -383,13 +383,14 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
template <typename ValueType>
void compute_squared_norm2(std::shared_ptr<const DefaultExecutor> exec,
const matrix::Dense<ValueType>* x,
matrix::Dense<remove_complex<ValueType>>* result)
matrix::Dense<remove_complex<ValueType>>* result,
array<char>& tmp)
{
run_kernel_col_reduction(
run_kernel_col_reduction_cached(
exec,
[] GKO_KERNEL(auto i, auto j, auto x) { return squared_norm(x(i, j)); },
GKO_KERNEL_REDUCE_SUM(remove_complex<ValueType>), result->get_values(),
x->get_size(), x);
x->get_size(), tmp, x);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(
Expand Down
101 changes: 101 additions & 0 deletions core/distributed/helpers.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/*******************************<GINKGO LICENSE>******************************
Copyright (c) 2017-2022, the Ginkgo authors
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:

1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.

3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
******************************<GINKGO LICENSE>*******************************/

#include <memory>


#include <ginkgo/config.hpp>
#include <ginkgo/core/distributed/vector.hpp>
#include <ginkgo/core/matrix/dense.hpp>


namespace gko {
namespace detail {


template <typename ValueType>
std::unique_ptr<matrix::Dense<ValueType>> create_with_config_of(
const matrix::Dense<ValueType>* mtx)
{
return matrix::Dense<ValueType>::create(mtx->get_executor(),
mtx->get_size(), mtx->get_stride());
}


template <typename ValueType>
const matrix::Dense<ValueType>* get_local(const matrix::Dense<ValueType>* mtx)
{
return mtx;
}


template <typename ValueType>
matrix::Dense<ValueType>* get_local(matrix::Dense<ValueType>* mtx)
{
return mtx;
}


#if GINKGO_BUILD_MPI


template <typename ValueType>
std::unique_ptr<distributed::Vector<ValueType>> create_with_config_of(
const distributed::Vector<ValueType>* mtx)
{
return distributed::Vector<ValueType>::create(
mtx->get_executor(), mtx->get_communicator(), mtx->get_size(),
mtx->get_local_vector()->get_size(),
mtx->get_local_vector()->get_stride());
}


template <typename ValueType>
matrix::Dense<ValueType>* get_local(distributed::Vector<ValueType>* mtx)
{
return const_cast<matrix::Dense<ValueType>*>(mtx->get_local_vector());
}


template <typename ValueType>
const matrix::Dense<ValueType>* get_local(
const distributed::Vector<ValueType>* mtx)
{
return mtx->get_local_vector();
}


#endif


} // namespace detail
} // namespace gko
112 changes: 58 additions & 54 deletions core/distributed/matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <ginkgo/core/distributed/matrix.hpp>


#include <ginkgo/core/base/precision_dispatch.hpp>
#include <ginkgo/core/distributed/vector.hpp>
#include <ginkgo/core/matrix/csr.hpp>

Expand Down Expand Up @@ -297,65 +298,70 @@ template <typename ValueType, typename LocalIndexType, typename GlobalIndexType>
void Matrix<ValueType, LocalIndexType, GlobalIndexType>::apply_impl(
const LinOp* b, LinOp* x) const
{
auto dense_b = as<global_vector_type>(b);
auto dense_x = as<global_vector_type>(x);
auto x_exec = x->get_executor();
auto local_x = gko::matrix::Dense<ValueType>::create(
x_exec, dense_x->get_local_vector()->get_size(),
gko::make_array_view(
x_exec, dense_x->get_local_vector()->get_num_stored_elements(),
dense_x->get_local_values()),
dense_x->get_local_vector()->get_stride());
if (this->get_non_local_matrix()->get_size()) {
auto req = this->communicate(dense_b->get_local_vector());
local_mtx_->apply(dense_b->get_local_vector(), local_x.get());
req.wait();
auto exec = this->get_executor();
auto use_host_buffer =
exec->get_master() != exec && !gko::mpi::is_gpu_aware();
if (use_host_buffer) {
recv_buffer_->copy_from(host_recv_buffer_.get());
}
non_local_mtx_->apply(one_scalar_.get(), recv_buffer_.get(),
one_scalar_.get(), local_x.get());
} else {
local_mtx_->apply(dense_b->get_local_vector(), local_x.get());
}
distributed::precision_dispatch_real_complex<ValueType>(
[this](const auto dense_b, auto dense_x) {
auto x_exec = dense_x->get_executor();
auto local_x = gko::matrix::Dense<ValueType>::create(
x_exec, dense_x->get_local_vector()->get_size(),
gko::make_array_view(
x_exec,
dense_x->get_local_vector()->get_num_stored_elements(),
dense_x->get_local_values()),
dense_x->get_local_vector()->get_stride());
if (this->get_non_local_matrix()->get_size()) {
auto req = this->communicate(dense_b->get_local_vector());
local_mtx_->apply(dense_b->get_local_vector(), local_x.get());
req.wait();
auto exec = this->get_executor();
auto use_host_buffer =
exec->get_master() != exec && !gko::mpi::is_gpu_aware();
if (use_host_buffer) {
recv_buffer_->copy_from(host_recv_buffer_.get());
}
non_local_mtx_->apply(one_scalar_.get(), recv_buffer_.get(),
one_scalar_.get(), local_x.get());
} else {
local_mtx_->apply(dense_b->get_local_vector(), local_x.get());
}
},
b, x);
}


template <typename ValueType, typename LocalIndexType, typename GlobalIndexType>
void Matrix<ValueType, LocalIndexType, GlobalIndexType>::apply_impl(
const LinOp* alpha, const LinOp* b, const LinOp* beta, LinOp* x) const
{
auto dense_b = as<global_vector_type>(b);
auto dense_x = as<global_vector_type>(x);
const auto x_exec = x->get_executor();
auto local_x = gko::matrix::Dense<ValueType>::create(
x_exec, dense_x->get_local_vector()->get_size(),
gko::make_array_view(
x_exec, dense_x->get_local_vector()->get_num_stored_elements(),
dense_x->get_local_values()),
dense_x->get_local_vector()->get_stride());
auto local_alpha = as<local_vector_type>(alpha);
auto local_beta = as<local_vector_type>(beta);
if (this->get_non_local_matrix()->get_size()) {
auto req = this->communicate(dense_b->get_local_vector());
local_mtx_->apply(local_alpha, dense_b->get_local_vector(), local_beta,
local_x.get());
req.wait();
auto exec = this->get_executor();
auto use_host_buffer =
exec->get_master() != exec && !gko::mpi::is_gpu_aware();
if (use_host_buffer) {
recv_buffer_->copy_from(host_recv_buffer_.get());
}
non_local_mtx_->apply(local_alpha, recv_buffer_.get(),
one_scalar_.get(), local_x.get());
} else {
local_mtx_->apply(local_alpha, dense_b->get_local_vector(), local_beta,
local_x.get());
}
distributed::precision_dispatch_real_complex<ValueType>(
[this](const auto local_alpha, const auto dense_b,
const auto local_beta, auto dense_x) {
const auto x_exec = dense_x->get_executor();
auto local_x = gko::matrix::Dense<ValueType>::create(
x_exec, dense_x->get_local_vector()->get_size(),
gko::make_array_view(
x_exec,
dense_x->get_local_vector()->get_num_stored_elements(),
dense_x->get_local_values()),
dense_x->get_local_vector()->get_stride());
if (this->get_non_local_matrix()->get_size()) {
auto req = this->communicate(dense_b->get_local_vector());
local_mtx_->apply(local_alpha, dense_b->get_local_vector(),
local_beta, local_x.get());
req.wait();
auto exec = this->get_executor();
auto use_host_buffer =
exec->get_master() != exec && !gko::mpi::is_gpu_aware();
if (use_host_buffer) {
recv_buffer_->copy_from(host_recv_buffer_.get());
}
non_local_mtx_->apply(local_alpha, recv_buffer_.get(),
one_scalar_.get(), local_x.get());
} else {
local_mtx_->apply(local_alpha, dense_b->get_local_vector(),
local_beta, local_x.get());
}
},
alpha, b, beta, x);
}


Expand Down Expand Up @@ -394,7 +400,6 @@ Matrix<ValueType, LocalIndexType, GlobalIndexType>::operator=(
gather_idxs_ = other.gather_idxs_;
send_offsets_ = other.send_offsets_;
recv_offsets_ = other.recv_offsets_;
recv_sizes_ = other.recv_sizes_;
send_sizes_ = other.send_sizes_;
recv_sizes_ = other.recv_sizes_;
non_local_to_global_ = other.non_local_to_global_;
Expand All @@ -419,7 +424,6 @@ Matrix<ValueType, LocalIndexType, GlobalIndexType>::operator=(Matrix&& other)
gather_idxs_ = std::move(other.gather_idxs_);
send_offsets_ = std::move(other.send_offsets_);
recv_offsets_ = std::move(other.recv_offsets_);
recv_sizes_ = std::move(other.recv_sizes_);
send_sizes_ = std::move(other.send_sizes_);
recv_sizes_ = std::move(other.recv_sizes_);
non_local_to_global_ = std::move(other.non_local_to_global_);
Expand Down
Loading