Skip to content

Commit

Permalink
Merge: Fix for unified reduction kernel
Browse files Browse the repository at this point in the history
Small fix for unified kernel reduction, change lambda to capture by value.

issue first found on Summit with gcc-7.50.0 and cuda-10.1, 11.3 and 11.4

Related PR: #926
  • Loading branch information
pratikvn authored Nov 24, 2021
2 parents cf69abf + 2f2d416 commit 00226d0
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 6 deletions.
4 changes: 2 additions & 2 deletions cuda/base/kernel_launch_reduction.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ void run_kernel_row_reduction(std::shared_ptr<const CudaExecutor> exec,
} else {
select_run_generic_kernel_row_reduction(
subwarp_sizes(),
[&](int compiled_subwarp_size) {
[cols](int compiled_subwarp_size) {
return compiled_subwarp_size >= cols ||
compiled_subwarp_size == config::warp_size;
},
Expand Down Expand Up @@ -488,7 +488,7 @@ void run_kernel_col_reduction(std::shared_ptr<const CudaExecutor> exec,
if (cols <= config::warp_size) {
select_generic_col_reduction_small(
subwarp_sizes(),
[&](int compiled_subwarp_size) {
[cols](int compiled_subwarp_size) {
return compiled_subwarp_size >= cols ||
compiled_subwarp_size == config::warp_size;
},
Expand Down
4 changes: 2 additions & 2 deletions dpcpp/base/kernel_launch_reduction.dp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -576,7 +576,7 @@ void run_kernel_row_reduction_stage1(std::shared_ptr<const DpcppExecutor> exec,
} else {
select_generic_kernel_row_reduction_2d(
subsubgroup_sizes(),
[&](int compiled_ssg_size) {
[cols](int compiled_ssg_size) {
return compiled_ssg_size >= cols ||
compiled_ssg_size == sg_size;
},
Expand Down Expand Up @@ -612,7 +612,7 @@ void run_kernel_col_reduction_stage1(std::shared_ptr<const DpcppExecutor> exec,
if (cols <= sg_size) {
select_generic_col_reduction_small(
subsubgroup_sizes(),
[&](int compiled_ssg_size) {
[cols](int compiled_ssg_size) {
return compiled_ssg_size >= cols ||
compiled_ssg_size == sg_size;
},
Expand Down
4 changes: 2 additions & 2 deletions hip/base/kernel_launch_reduction.hip.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ void run_kernel_row_reduction(std::shared_ptr<const HipExecutor> exec,
} else {
select_run_generic_kernel_row_reduction(
subwarp_sizes(),
[&](int compiled_subwarp_size) {
[cols](int compiled_subwarp_size) {
return compiled_subwarp_size >= cols ||
compiled_subwarp_size == config::warp_size;
},
Expand Down Expand Up @@ -496,7 +496,7 @@ void run_kernel_col_reduction(std::shared_ptr<const HipExecutor> exec,
if (cols <= config::warp_size) {
select_generic_col_reduction_small(
subwarp_sizes(),
[&](int compiled_subwarp_size) {
[cols](int compiled_subwarp_size) {
return compiled_subwarp_size >= cols ||
compiled_subwarp_size == config::warp_size;
},
Expand Down

0 comments on commit 00226d0

Please sign in to comment.