From 0521071ec5e4f92cc4ccb203b5dc3aa8fdd1c457 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 23 Nov 2021 17:43:20 +0100 Subject: [PATCH 1/2] Small fix for unified kernel reduc on CUDA --- cuda/base/kernel_launch_reduction.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cuda/base/kernel_launch_reduction.cuh b/cuda/base/kernel_launch_reduction.cuh index d1d6285e839..98c60d3c4d4 100644 --- a/cuda/base/kernel_launch_reduction.cuh +++ b/cuda/base/kernel_launch_reduction.cuh @@ -458,7 +458,7 @@ void run_kernel_row_reduction(std::shared_ptr exec, } else { select_run_generic_kernel_row_reduction( subwarp_sizes(), - [&](int compiled_subwarp_size) { + [=](int compiled_subwarp_size) { return compiled_subwarp_size >= cols || compiled_subwarp_size == config::warp_size; }, @@ -488,7 +488,7 @@ void run_kernel_col_reduction(std::shared_ptr exec, if (cols <= config::warp_size) { select_generic_col_reduction_small( subwarp_sizes(), - [&](int compiled_subwarp_size) { + [=](int compiled_subwarp_size) { return compiled_subwarp_size >= cols || compiled_subwarp_size == config::warp_size; }, From 2f2d416c37cd841184e3cfa376cfa8e7c26ea134 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 23 Nov 2021 17:58:57 +0100 Subject: [PATCH 2/2] Review update. --- cuda/base/kernel_launch_reduction.cuh | 4 ++-- dpcpp/base/kernel_launch_reduction.dp.hpp | 4 ++-- hip/base/kernel_launch_reduction.hip.hpp | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cuda/base/kernel_launch_reduction.cuh b/cuda/base/kernel_launch_reduction.cuh index 98c60d3c4d4..c70e5564503 100644 --- a/cuda/base/kernel_launch_reduction.cuh +++ b/cuda/base/kernel_launch_reduction.cuh @@ -458,7 +458,7 @@ void run_kernel_row_reduction(std::shared_ptr exec, } else { select_run_generic_kernel_row_reduction( subwarp_sizes(), - [=](int compiled_subwarp_size) { + [cols](int compiled_subwarp_size) { return compiled_subwarp_size >= cols || compiled_subwarp_size == config::warp_size; }, @@ -488,7 +488,7 @@ void run_kernel_col_reduction(std::shared_ptr exec, if (cols <= config::warp_size) { select_generic_col_reduction_small( subwarp_sizes(), - [=](int compiled_subwarp_size) { + [cols](int compiled_subwarp_size) { return compiled_subwarp_size >= cols || compiled_subwarp_size == config::warp_size; }, diff --git a/dpcpp/base/kernel_launch_reduction.dp.hpp b/dpcpp/base/kernel_launch_reduction.dp.hpp index 5ebf06b0f71..47d97676bb5 100644 --- a/dpcpp/base/kernel_launch_reduction.dp.hpp +++ b/dpcpp/base/kernel_launch_reduction.dp.hpp @@ -576,7 +576,7 @@ void run_kernel_row_reduction_stage1(std::shared_ptr exec, } else { select_generic_kernel_row_reduction_2d( subsubgroup_sizes(), - [&](int compiled_ssg_size) { + [cols](int compiled_ssg_size) { return compiled_ssg_size >= cols || compiled_ssg_size == sg_size; }, @@ -612,7 +612,7 @@ void run_kernel_col_reduction_stage1(std::shared_ptr exec, if (cols <= sg_size) { select_generic_col_reduction_small( subsubgroup_sizes(), - [&](int compiled_ssg_size) { + [cols](int compiled_ssg_size) { return compiled_ssg_size >= cols || compiled_ssg_size == sg_size; }, diff --git a/hip/base/kernel_launch_reduction.hip.hpp b/hip/base/kernel_launch_reduction.hip.hpp index 610f89673a9..fa20000d5bb 100644 --- a/hip/base/kernel_launch_reduction.hip.hpp +++ b/hip/base/kernel_launch_reduction.hip.hpp @@ -466,7 +466,7 @@ void run_kernel_row_reduction(std::shared_ptr exec, } else { select_run_generic_kernel_row_reduction( subwarp_sizes(), - [&](int compiled_subwarp_size) { + [cols](int compiled_subwarp_size) { return compiled_subwarp_size >= cols || compiled_subwarp_size == config::warp_size; }, @@ -496,7 +496,7 @@ void run_kernel_col_reduction(std::shared_ptr exec, if (cols <= config::warp_size) { select_generic_col_reduction_small( subwarp_sizes(), - [&](int compiled_subwarp_size) { + [cols](int compiled_subwarp_size) { return compiled_subwarp_size >= cols || compiled_subwarp_size == config::warp_size; },