Skip to content

Commit

Permalink
Merge simplification of GMRES kernels
Browse files Browse the repository at this point in the history
This PR separates the step1 and initialize2 kernels into individual
reductions (norm and dot) and axpy/scale operations, which allows us
to use the simple kernel setup for all of GMRES as well. This will also
simplify the addition of CGS-Arnoldi to plain GMRES (and distributed
GMRES later on).


Related PR: #861
  • Loading branch information
Thomas Grützmacher authored Nov 3, 2022
2 parents 87859a4 + 2e70a8b commit 3902b86
Show file tree
Hide file tree
Showing 35 changed files with 1,586 additions and 2,767 deletions.
2 changes: 2 additions & 0 deletions common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ set(UNIFIED_SOURCES
solver/bicgstab_kernels.cpp
solver/cg_kernels.cpp
solver/cgs_kernels.cpp
solver/common_gmres_kernels.cpp
solver/fcg_kernels.cpp
solver/gmres_kernels.cpp
solver/ir_kernels.cpp
)
list(TRANSFORM UNIFIED_SOURCES PREPEND ${CMAKE_CURRENT_SOURCE_DIR}/unified/)
Expand Down
4 changes: 2 additions & 2 deletions common/cuda_hip/solver/cb_gmres_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ __global__ __launch_bounds__(default_block_size) void zero_matrix_kernel(

// Must be called with at least `num_rows * stride_krylov` threads in total.
template <size_type block_size, typename ValueType, typename Accessor3d>
__global__ __launch_bounds__(block_size) void initialize_2_1_kernel(
__global__ __launch_bounds__(block_size) void restart_1_kernel(
size_type num_rows, size_type num_rhs, size_type krylov_dim,
Accessor3d krylov_bases, ValueType* __restrict__ residual_norm_collection,
size_type stride_residual_nc)
Expand Down Expand Up @@ -82,7 +82,7 @@ __global__ __launch_bounds__(block_size) void initialize_2_1_kernel(

// Must be called with at least `num_rows * num_rhs` threads in total.
template <size_type block_size, typename ValueType, typename Accessor3d>
__global__ __launch_bounds__(block_size) void initialize_2_2_kernel(
__global__ __launch_bounds__(block_size) void restart_2_kernel(
size_type num_rows, size_type num_rhs,
const ValueType* __restrict__ residual, size_type stride_residual,
const remove_complex<ValueType>* __restrict__ residual_norm,
Expand Down
2 changes: 1 addition & 1 deletion common/cuda_hip/solver/common_gmres_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Must be called with at least `max(stride_b * num_rows, krylov_dim *
// num_cols)` threads in total.
template <size_type block_size, typename ValueType>
__global__ __launch_bounds__(block_size) void initialize_1_kernel(
__global__ __launch_bounds__(block_size) void initialize_kernel(
size_type num_rows, size_type num_cols, size_type krylov_dim,
const ValueType* __restrict__ b, size_type stride_b,
ValueType* __restrict__ residual, size_type stride_residual,
Expand Down
240 changes: 0 additions & 240 deletions common/cuda_hip/solver/gmres_kernels.hpp.inc

This file was deleted.

Loading

0 comments on commit 3902b86

Please sign in to comment.