From 829ff9214778a5d4ebeba8433614e9e409618f38 Mon Sep 17 00:00:00 2001 From: Ian Li Date: Mon, 10 Jun 2024 07:50:12 -0700 Subject: [PATCH] [SYCL] Change check_device_code HIP tests to use SYCL_EXTERNAL (#13990) Changed the HIP sycl/test/check_device_code lit tests to use SYCL_EXTERNAL functions instead of writing entire programs. --- .../hip/atomic/amdgpu_unsafe_atomics.cpp | 74 +++++------- .../matrix/matrix-hip-bfloat16-float-test.cpp | 111 ++++++++--------- .../matrix/matrix-hip-double-double-test.cpp | 68 ++++------- .../hip/matrix/matrix-hip-half-float-test.cpp | 113 ++++++++---------- .../hip/matrix/matrix-hip-int8-int32-test.cpp | 113 ++++++++---------- 5 files changed, 217 insertions(+), 262 deletions(-) diff --git a/sycl/test/check_device_code/hip/atomic/amdgpu_unsafe_atomics.cpp b/sycl/test/check_device_code/hip/atomic/amdgpu_unsafe_atomics.cpp index 0b78738a01514..b045fdb99c8d5 100644 --- a/sycl/test/check_device_code/hip/atomic/amdgpu_unsafe_atomics.cpp +++ b/sycl/test/check_device_code/hip/atomic/amdgpu_unsafe_atomics.cpp @@ -5,45 +5,37 @@ #include -class intKernel; -class fpKernel; - -int main() { - int *i; - float *f; - double *d; - sycl::queue{}.single_task([=] { - sycl::atomic_ref - atomicInt(*i); - atomicInt.fetch_xor(1); - atomicInt.fetch_and(1); - atomicInt.fetch_or(1); - // CHECK: amdgpu_kernel void{{.*}}intKernel - // CHECK-SAFE: cmpxchg volatile - // CHECK-SAFE-NOT: atomicrmw - // CHECK-UNSAFE: atomicrmw volatile xor - // CHECK-UNSAFE: atomicrmw volatile and - // CHECK-UNSAFE: atomicrmw volatile or - // CHECK-UNSAFE-NOT: cmpxchg - }); - sycl::queue{}.single_task([=] { - sycl::atomic_ref(*f) - .fetch_add(1.0f); - // CHECK: amdgpu_kernel void{{.*}}fpKernel - // CHECK-SAFE: atomicrmw volatile fadd - // CHECK-SAFE-NOT: llvm.amdgcn.global.atomic.fadd.f32 - // CHECK-UNSAFE-FP: llvm.amdgcn.global.atomic.fadd.f32 - // CHECK-UNSAFE-FP-NOT: atomicrmw volatile fadd - sycl::atomic_ref(*d) - .fetch_add(1.0); - // CHECK-SAFE: cmpxchg - // CHECK-SAFE-NOT: llvm.amdgcn.global.atomic.fadd.f64 - // CHECK-UNSAFE-FP: llvm.amdgcn.global.atomic.fadd.f64 - // CHECK-UNSAFE-FP-NOT: cmpxchg - // CHECK: __CLANG_OFFLOAD_BUNDLE____END__ sycl-amdgcn-amd-amdhsa- - }); +SYCL_EXTERNAL void intAtomicFunc(int *i) { + sycl::atomic_ref + atomicInt(*i); + atomicInt.fetch_xor(1); + atomicInt.fetch_and(1); + atomicInt.fetch_or(1); + // CHECK: void{{.*}}intAtomicFunc + // CHECK-SAFE: cmpxchg volatile + // CHECK-SAFE-NOT: atomicrmw + // CHECK-UNSAFE: atomicrmw volatile xor + // CHECK-UNSAFE: atomicrmw volatile and + // CHECK-UNSAFE: atomicrmw volatile or + // CHECK-UNSAFE-NOT: cmpxchg } + +SYCL_EXTERNAL void fpAtomicFunc(float *f, double *d) { + sycl::atomic_ref(*f) + .fetch_add(1.0f); + // CHECK: void{{.*}}fpAtomicFunc + // CHECK-SAFE: atomicrmw volatile fadd + // CHECK-SAFE-NOT: llvm.amdgcn.global.atomic.fadd.f32 + // CHECK-UNSAFE-FP: llvm.amdgcn.global.atomic.fadd.f32 + // CHECK-UNSAFE-FP-NOT: atomicrmw volatile fadd + sycl::atomic_ref(*d) + .fetch_add(1.0); + // CHECK-SAFE: cmpxchg + // CHECK-SAFE-NOT: llvm.amdgcn.global.atomic.fadd.f64 + // CHECK-UNSAFE-FP: llvm.amdgcn.global.atomic.fadd.f64 + // CHECK-UNSAFE-FP-NOT: cmpxchg + // CHECK: __CLANG_OFFLOAD_BUNDLE____END__ sycl-amdgcn-amd-amdhsa- +} \ No newline at end of file diff --git a/sycl/test/check_device_code/hip/matrix/matrix-hip-bfloat16-float-test.cpp b/sycl/test/check_device_code/hip/matrix/matrix-hip-bfloat16-float-test.cpp index 29843ac50f114..2fabef77ee86a 100644 --- a/sycl/test/check_device_code/hip/matrix/matrix-hip-bfloat16-float-test.cpp +++ b/sycl/test/check_device_code/hip/matrix/matrix-hip-bfloat16-float-test.cpp @@ -7,62 +7,55 @@ using namespace sycl; using namespace sycl::ext::oneapi::experimental::matrix; using sycl::ext::oneapi::bfloat16; -int main() { - buffer bufA(nullptr, range<1>(1)); - buffer bufB(nullptr, range<1>(1)); - buffer bufC(nullptr, range<1>(1)); - buffer bufD(nullptr, range<1>(1)); - queue q; - - q.submit([&](handler &cgh) { - sycl::accessor - accA(bufA, cgh); - sycl::accessor - accB(bufB, cgh); - sycl::accessor - accC(bufC, cgh); - sycl::accessor - accD(bufD, cgh); - - cgh.parallel_for( - nd_range<2>({1, 64}, {1, 64}), - [=](nd_item<2> item) [[sycl::reqd_work_group_size(1, 1, 64)]] { - sycl::sub_group sg = item.get_sub_group(); - - joint_matrix sub_c{}; - joint_matrix - sub_a{}; - joint_matrix - sub_b{}; - // CHECK: tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x16bf16.1k(<4 x i16> zeroinitializer, <4 x i16> zeroinitializer, <4 x float> zeroinitializer, i32 0, i32 0, i32 0) - joint_matrix_mad(sg, sub_c, sub_a, sub_b, sub_c); - joint_matrix_store( - sg, sub_c, accD.template get_multi_ptr(), - 16, layout::row_major); - }); - - cgh.parallel_for( - nd_range<2>({1, 64}, {1, 64}), - [=](nd_item<2> item) [[sycl::reqd_work_group_size(1, 1, 64)]] { - sycl::sub_group sg = item.get_sub_group(); - - joint_matrix sub_c{}; - joint_matrix - sub_a{}; - joint_matrix - sub_b{}; - - // CHECK: tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8bf16.1k(<4 x i16> zeroinitializer, <4 x i16> zeroinitializer, <16 x float> zeroinitializer, i32 0, i32 0, i32 0) - joint_matrix_mad(sg, sub_c, sub_a, sub_b, sub_c); - joint_matrix_store( - sg, sub_c, accD.template get_multi_ptr(), - 32, layout::row_major); - }); - }); - - return 0; -}; +SYCL_EXTERNAL [[sycl::reqd_work_group_size(1, 1, 64)]] void +row_row_m16n16k16(sycl::accessor + accA, + sycl::accessor + accB, + sycl::accessor + accC, + sycl::accessor + accD, + nd_item<2> item) { + sycl::sub_group sg = item.get_sub_group(); + + joint_matrix sub_c{}; + joint_matrix sub_a{}; + joint_matrix sub_b{}; + // CHECK: tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x16bf16.1k(<4 x i16> zeroinitializer, <4 x i16> zeroinitializer, <4 x float> zeroinitializer, i32 0, i32 0, i32 0) + joint_matrix_mad(sg, sub_c, sub_a, sub_b, sub_c); + joint_matrix_store(sg, sub_c, + accD.template get_multi_ptr(), 16, + layout::row_major); +} + +SYCL_EXTERNAL [[sycl::reqd_work_group_size(1, 1, 64)]] void +row_col_m32n32k8(sycl::accessor + accA, + sycl::accessor + accB, + sycl::accessor + accC, + sycl::accessor + accD, + nd_item<2> item) { + sycl::sub_group sg = item.get_sub_group(); + + joint_matrix sub_c{}; + joint_matrix sub_a{}; + joint_matrix sub_b{}; + + // CHECK: tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8bf16.1k(<4 x i16> zeroinitializer, <4 x i16> zeroinitializer, <16 x float> zeroinitializer, i32 0, i32 0, i32 0) + joint_matrix_mad(sg, sub_c, sub_a, sub_b, sub_c); + joint_matrix_store(sg, sub_c, + accD.template get_multi_ptr(), 32, + layout::row_major); +} \ No newline at end of file diff --git a/sycl/test/check_device_code/hip/matrix/matrix-hip-double-double-test.cpp b/sycl/test/check_device_code/hip/matrix/matrix-hip-double-double-test.cpp index e82e6fd0337db..9c0b16ec82a41 100644 --- a/sycl/test/check_device_code/hip/matrix/matrix-hip-double-double-test.cpp +++ b/sycl/test/check_device_code/hip/matrix/matrix-hip-double-double-test.cpp @@ -6,45 +6,29 @@ using namespace sycl; using namespace sycl::ext::oneapi::experimental::matrix; -int main() { - buffer bufA(nullptr, range<1>(1)); - buffer bufB(nullptr, range<1>(1)); - buffer bufC(nullptr, range<1>(1)); - buffer bufD(nullptr, range<1>(1)); - queue q; - - q.submit([&](handler &cgh) { - sycl::accessor - accA(bufA, cgh); - sycl::accessor - accB(bufB, cgh); - sycl::accessor - accC(bufC, cgh); - sycl::accessor - accD(bufD, cgh); - - cgh.parallel_for( - nd_range<2>({1, 64}, {1, 64}), - [=](nd_item<2> item) [[sycl::reqd_work_group_size(1, 1, 64)]] { - sycl::sub_group sg = item.get_sub_group(); - - joint_matrix sub_c{}; - joint_matrix - sub_a{}; - joint_matrix - sub_b{}; - - // CHECK: tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double {{.*}}, double {{.*}}, <4 x double> zeroinitializer, i32 0, i32 0, i32 0) - joint_matrix_mad(sg, sub_c, sub_a, sub_b, sub_c); - joint_matrix_store( - sg, sub_c, accD.template get_multi_ptr(), - 16, layout::row_major); - }); - }); - - return 0; -}; +SYCL_EXTERNAL [[sycl::reqd_work_group_size(1, 1, 64)]] void +row_row_m16n16k4(sycl::accessor + accA, + sycl::accessor + accB, + sycl::accessor + accC, + sycl::accessor + accD, + nd_item<2> item) { + sycl::sub_group sg = item.get_sub_group(); + + joint_matrix sub_c{}; + joint_matrix sub_a{}; + joint_matrix sub_b{}; + + // CHECK: tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double {{.*}}, double {{.*}}, <4 x double> zeroinitializer, i32 0, i32 0, i32 0) + joint_matrix_mad(sg, sub_c, sub_a, sub_b, sub_c); + joint_matrix_store(sg, sub_c, + accD.template get_multi_ptr(), 16, + layout::row_major); +} \ No newline at end of file diff --git a/sycl/test/check_device_code/hip/matrix/matrix-hip-half-float-test.cpp b/sycl/test/check_device_code/hip/matrix/matrix-hip-half-float-test.cpp index 2afe666034bf5..7e5ff71b9b5d9 100644 --- a/sycl/test/check_device_code/hip/matrix/matrix-hip-half-float-test.cpp +++ b/sycl/test/check_device_code/hip/matrix/matrix-hip-half-float-test.cpp @@ -6,63 +6,56 @@ using namespace sycl; using namespace sycl::ext::oneapi::experimental::matrix; -int main() { - buffer bufA(nullptr, range<1>(1)); - buffer bufB(nullptr, range<1>(1)); - buffer bufC(nullptr, range<1>(1)); - buffer bufD(nullptr, range<1>(1)); - queue q; - - q.submit([&](handler &cgh) { - sycl::accessor - accA(bufA, cgh); - sycl::accessor - accB(bufB, cgh); - sycl::accessor - accC(bufC, cgh); - sycl::accessor - accD(bufD, cgh); - - cgh.parallel_for( - nd_range<2>({1, 64}, {1, 64}), - [=](nd_item<2> item) [[sycl::reqd_work_group_size(1, 1, 64)]] { - sycl::sub_group sg = item.get_sub_group(); - - joint_matrix sub_c{}; - joint_matrix - sub_a{}; - joint_matrix - sub_b{}; - - // CHECK: tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x16f16(<4 x half> zeroinitializer, <4 x half> zeroinitializer, <4 x float> zeroinitializer, i32 0, i32 0, i32 0) - joint_matrix_mad(sg, sub_c, sub_a, sub_b, sub_c); - joint_matrix_store( - sg, sub_c, accD.template get_multi_ptr(), - 16, layout::row_major); - }); - - cgh.parallel_for( - nd_range<2>({1, 64}, {1, 64}), - [=](nd_item<2> item) [[sycl::reqd_work_group_size(1, 1, 64)]] { - sycl::sub_group sg = item.get_sub_group(); - - joint_matrix sub_c{}; - joint_matrix - sub_a{}; - joint_matrix - sub_b{}; - - // CHECK: tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8f16(<4 x half> zeroinitializer, <4 x half> zeroinitializer, <16 x float> zeroinitializer, i32 0, i32 0, i32 0) - joint_matrix_mad(sg, sub_c, sub_a, sub_b, sub_c); - joint_matrix_store( - sg, sub_c, accD.template get_multi_ptr(), - 32, layout::row_major); - }); - }); - - return 0; -}; +SYCL_EXTERNAL [[sycl::reqd_work_group_size(1, 1, 64)]] void +row_row_m16n16k16(sycl::accessor + accA, + sycl::accessor + accB, + sycl::accessor + accC, + sycl::accessor + accD, + nd_item<2> item) { + sycl::sub_group sg = item.get_sub_group(); + + joint_matrix sub_c{}; + joint_matrix sub_a{}; + joint_matrix sub_b{}; + + // CHECK: tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x16f16(<4 x half> zeroinitializer, <4 x half> zeroinitializer, <4 x float> zeroinitializer, i32 0, i32 0, i32 0) + joint_matrix_mad(sg, sub_c, sub_a, sub_b, sub_c); + joint_matrix_store(sg, sub_c, + accD.template get_multi_ptr(), 16, + layout::row_major); +} + +SYCL_EXTERNAL [[sycl::reqd_work_group_size(1, 1, 64)]] void +row_col_m32n32k8(sycl::accessor + accA, + sycl::accessor + accB, + sycl::accessor + accC, + sycl::accessor + accD, + nd_item<2> item) { + sycl::sub_group sg = item.get_sub_group(); + + joint_matrix sub_c{}; + joint_matrix sub_a{}; + joint_matrix sub_b{}; + + // CHECK: tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8f16(<4 x half> zeroinitializer, <4 x half> zeroinitializer, <16 x float> zeroinitializer, i32 0, i32 0, i32 0) + joint_matrix_mad(sg, sub_c, sub_a, sub_b, sub_c); + joint_matrix_store(sg, sub_c, + accD.template get_multi_ptr(), 32, + layout::row_major); +} \ No newline at end of file diff --git a/sycl/test/check_device_code/hip/matrix/matrix-hip-int8-int32-test.cpp b/sycl/test/check_device_code/hip/matrix/matrix-hip-int8-int32-test.cpp index d39f7a8772717..98c74f54be794 100644 --- a/sycl/test/check_device_code/hip/matrix/matrix-hip-int8-int32-test.cpp +++ b/sycl/test/check_device_code/hip/matrix/matrix-hip-int8-int32-test.cpp @@ -6,63 +6,56 @@ using namespace sycl; using namespace sycl::ext::oneapi::experimental::matrix; -int main() { - buffer bufA(nullptr, range<1>(1)); - buffer bufB(nullptr, range<1>(1)); - buffer bufC(nullptr, range<1>(1)); - buffer bufD(nullptr, range<1>(1)); - queue q; - - q.submit([&](handler &cgh) { - sycl::accessor - accA(bufA, cgh); - sycl::accessor - accB(bufB, cgh); - sycl::accessor - accC(bufC, cgh); - sycl::accessor - accD(bufD, cgh); - - cgh.parallel_for( - nd_range<2>({1, 64}, {1, 64}), - [=](nd_item<2> item) [[sycl::reqd_work_group_size(1, 1, 64)]] { - sycl::sub_group sg = item.get_sub_group(); - - joint_matrix sub_c{}; - joint_matrix - sub_a{}; - joint_matrix - sub_b{}; - - // CHECK: tail call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x16i8(i32 {{.*}}, i32 {{.*}}, <4 x i32> zeroinitializer, i32 0, i32 0, i32 0) - joint_matrix_mad(sg, sub_c, sub_a, sub_b, sub_c); - joint_matrix_store( - sg, sub_c, accD.template get_multi_ptr(), - 16, layout::row_major); - }); - - cgh.parallel_for( - nd_range<2>({1, 64}, {1, 64}), - [=](nd_item<2> item) [[sycl::reqd_work_group_size(1, 1, 64)]] { - sycl::sub_group sg = item.get_sub_group(); - - joint_matrix sub_c{}; - joint_matrix - sub_a{}; - joint_matrix - sub_b{}; - - // CHECK: tail call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x8i8(i32 {{.*}}, i32 {{.*}}, <16 x i32> zeroinitializer, i32 0, i32 0, i32 0) - joint_matrix_mad(sg, sub_c, sub_a, sub_b, sub_c); - joint_matrix_store( - sg, sub_c, accD.template get_multi_ptr(), - 32, layout::row_major); - }); - }); - - return 0; -}; +SYCL_EXTERNAL [[sycl::reqd_work_group_size(1, 1, 64)]] void +row_row_m16n16k16(sycl::accessor + accA, + sycl::accessor + accB, + sycl::accessor + accC, + sycl::accessor + accD, + nd_item<2> item) { + sycl::sub_group sg = item.get_sub_group(); + + joint_matrix sub_c{}; + joint_matrix sub_a{}; + joint_matrix sub_b{}; + + // CHECK: tail call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x16i8(i32 {{.*}}, i32 {{.*}}, <4 x i32> zeroinitializer, i32 0, i32 0, i32 0) + joint_matrix_mad(sg, sub_c, sub_a, sub_b, sub_c); + joint_matrix_store(sg, sub_c, + accD.template get_multi_ptr(), 16, + layout::row_major); +} + +SYCL_EXTERNAL [[sycl::reqd_work_group_size(1, 1, 64)]] void +row_col_m32n32k8(sycl::accessor + accA, + sycl::accessor + accB, + sycl::accessor + accC, + sycl::accessor + accD, + nd_item<2> item) { + sycl::sub_group sg = item.get_sub_group(); + + joint_matrix sub_c{}; + joint_matrix sub_a{}; + joint_matrix sub_b{}; + + // CHECK: tail call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x8i8(i32 {{.*}}, i32 {{.*}}, <16 x i32> zeroinitializer, i32 0, i32 0, i32 0) + joint_matrix_mad(sg, sub_c, sub_a, sub_b, sub_c); + joint_matrix_store(sg, sub_c, + accD.template get_multi_ptr(), 32, + layout::row_major); +} \ No newline at end of file