Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable mixed CUDA/ROCm benchmarks #911

Merged
merged 2 commits into from
Nov 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions BENCHMARKING.md
Original file line number Diff line number Diff line change
Expand Up @@ -287,12 +287,12 @@ The supported environment variables are described in the following list:
* `PRECONDS={jacobi,ic,ilu,paric,parict,parilu,parilut,ic-isai,ilu-isai,paric-isai,parict-isai,parilu-isai,parilut-isai,none}`
the preconditioners to use for either `solver` or `preconditioner` benchmarks.
Multiple options can be passed to this variable. Default is `none`.
* `FORMATS={csr,coo,ell,hybrid,sellp,hybridxx,cusp_xx,hipsp_xx}` the matrix
* `FORMATS={csr,coo,ell,hybrid,sellp,hybridxx,cusparse_xx,hipsparse_xx}` the matrix
formats to benchmark for the `spmv` phase of the benchmark. Run
`${ginkgo_build_dir}/benchmark/spmv/spmv --help` for a full list. If needed,
multiple options for hybrid with different optimization parameters are
available. Depending on the libraries available at build time, vendor
library formats (cuSPARSE with `cusp_` prefix or hipSPARSE with `hipsp_`
library formats (cuSPARSE with `cusparse_` prefix or hipSPARSE with `hipsparse_`
prefix) can be used as well. Multiple options can be passed. The default is
`csr,coo,ell,hybrid,sellp`.
* `SOLVERS={bicgstab,bicg,cg,cgs,fcg,gmres,cb_gmres_{keep,reduce1,reduce2,integer,ireduce1,ireduce2},lower_trs,upper_trs}`
Expand Down
87 changes: 56 additions & 31 deletions benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,37 +4,28 @@ if (NOT CMAKE_BUILD_TYPE STREQUAL "Release")
"will be affected")
endif()

if (GINKGO_BUILD_CUDA AND GINKGO_BUILD_HIP AND
GINKGO_HIP_PLATFORM MATCHES "${HIP_PLATFORM_AMD_REGEX}")
message(FATAL_ERROR "Building the benchmarks for both HIP AMD and CUDA "
"at the same time is currently not supported. "
"Disable the benchmark build using `-DGINKGO_BUILD_BENCHMARKS=OFF` "
"or use `export HIP_PLATFORM=nvcc` (ROCM <=4.0) or "
"`export HIP_PLATFORM=nvidia` (ROCM >= 4.1) in your build environment instead.")
endif()

function(ginkgo_benchmark_add_tuning_maybe name)
if(GINKGO_BENCHMARK_ENABLE_TUNING)
target_sources(${name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../utils/tuning_variables.cpp)
endif()
endfunction()

function(ginkgo_benchmark_cusp_linops name)
target_compile_definitions("${name}" PRIVATE HAS_CUDA=1)
target_link_libraries("${name}" ginkgo ${CUDA_RUNTIME_LIBS}
${CUBLAS} ${CUSPARSE})
target_include_directories("${name}" SYSTEM PRIVATE ${CUDA_INCLUDE_DIRS})
if(CMAKE_CUDA_COMPILER_VERSION GREATER_EQUAL "9.2")
target_compile_definitions("${name}" PRIVATE ALLOWMP=1)
endif()
function(ginkgo_benchmark_cusparse_linops type def)
add_library(cusparse_linops_${type} utils/cuda_linops.cu)
# make the dependency public to catch issues
target_compile_definitions(cusparse_linops_${type} PUBLIC ${def})
target_link_libraries(cusparse_linops_${type} Ginkgo::ginkgo ${CUDA_RUNTIME_LIBS} ${CUBLAS} ${CUSPARSE})
target_include_directories(cusparse_linops_${type} SYSTEM PRIVATE ${CUDA_INCLUDE_DIRS})
target_compile_definitions(cusparse_linops_${type} PRIVATE ALLOWMP=1)
endfunction()

function(ginkgo_benchmark_hipsp_linops name)
target_compile_definitions("${name}" PRIVATE HAS_HIP=1)
function(ginkgo_benchmark_hipsparse_linops type def)
add_library(hipsparse_linops_${type} utils/hip_linops.hip.cpp)
target_compile_definitions(hipsparse_linops_${type} PUBLIC ${def})
EXECUTE_PROCESS(COMMAND ${HIP_PATH}/bin/hipconfig --cpp_config OUTPUT_VARIABLE HIP_CXX_FLAGS)
set_target_properties("${name}" PROPERTIES COMPILE_FLAGS ${HIP_CXX_FLAGS})
set_target_properties(hipsparse_linops_${type} PROPERTIES COMPILE_FLAGS ${HIP_CXX_FLAGS})
# use Thrust C++ device just for compilation, we don't use thrust::complex in the benchmarks
target_compile_definitions("${name}" PUBLIC -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_CPP)
target_compile_definitions(hipsparse_linops_${type} PUBLIC -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_CPP)
# for some reason, HIP creates a dependency on Threads::Threads here, so we
# need to find it
find_package(Threads REQUIRED)
Expand All @@ -43,11 +34,10 @@ function(ginkgo_benchmark_hipsp_linops name)
find_package(hiprand REQUIRED)
find_package(hipsparse REQUIRED)
find_package(rocrand REQUIRED)
target_include_directories("${name}" SYSTEM PRIVATE
target_include_directories(hipsparse_linops_${type} SYSTEM PRIVATE
${HSA_HEADER} ${HIP_INCLUDE_DIRS}
${HIPBLAS_INCLUDE_DIRS} ${HIPSPARSE_INCLUDE_DIRS})

target_link_libraries("${name}" ${HIPSPARSE_LIBRARIES})
target_link_libraries(hipsparse_linops_${type} Ginkgo::ginkgo ${HIPSPARSE_LIBRARIES})
endfunction()


Expand All @@ -61,18 +51,26 @@ endfunction()
# \param macro_def preprocessor macro name that will be defined during
# building (to compile for a specific type)
# All remaining arguments will be treated as source files
function(ginkgo_add_single_benchmark_executable name use_lib_linops macro_def)
function(ginkgo_add_single_benchmark_executable name use_lib_linops macro_def type)
add_executable("${name}" ${ARGN})
target_link_libraries("${name}" ginkgo gflags rapidjson)
if (GINKGO_BUILD_CUDA)
target_link_libraries("${name}" cuda_timer)
endif()
if (GINKGO_BUILD_HIP)
target_link_libraries("${name}" hip_timer)
endif()
Comment on lines +57 to +62
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After this change the benchmark will not have HipTimer and CudaTimer declaration, right?
Only able to use them with Timer

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, the actual implementations are hidden, since they need CUDA/HIP-specific types as members.

target_compile_definitions("${name}" PRIVATE "${macro_def}")
target_compile_options("${name}" PRIVATE ${GINKGO_COMPILER_FLAGS})
ginkgo_benchmark_add_tuning_maybe("${name}")
if("${use_lib_linops}")
if (GINKGO_BUILD_CUDA)
ginkgo_benchmark_cusp_linops("${name}")
target_compile_definitions("${name}" PRIVATE HAS_CUDA=1)
target_link_libraries("${name}" cusparse_linops_${type})
endif()
if (GINKGO_BUILD_HIP)
ginkgo_benchmark_hipsp_linops("${name}")
target_compile_definitions("${name}" PRIVATE HAS_HIP=1)
target_link_libraries("${name}" hipsparse_linops_${type})
endif()
endif()
endfunction(ginkgo_add_single_benchmark_executable)
Expand All @@ -87,16 +85,43 @@ endfunction(ginkgo_add_single_benchmark_executable)
# All remaining arguments will be treated as source files
function(ginkgo_add_typed_benchmark_executables name use_lib_linops)
ginkgo_add_single_benchmark_executable(
"${name}" "${use_lib_linops}" "GKO_BENCHMARK_USE_DOUBLE_PRECISION" ${ARGN})
"${name}" "${use_lib_linops}" "GKO_BENCHMARK_USE_DOUBLE_PRECISION" "d" ${ARGN})
ginkgo_add_single_benchmark_executable(
"${name}_single" "${use_lib_linops}" "GKO_BENCHMARK_USE_SINGLE_PRECISION" ${ARGN})
"${name}_single" "${use_lib_linops}" "GKO_BENCHMARK_USE_SINGLE_PRECISION" "s" ${ARGN})
ginkgo_add_single_benchmark_executable(
"${name}_dcomplex" "${use_lib_linops}" "GKO_BENCHMARK_USE_DOUBLE_COMPLEX_PRECISION" ${ARGN})
"${name}_dcomplex" "${use_lib_linops}" "GKO_BENCHMARK_USE_DOUBLE_COMPLEX_PRECISION" "z" ${ARGN})
ginkgo_add_single_benchmark_executable(
"${name}_scomplex" "${use_lib_linops}" "GKO_BENCHMARK_USE_SINGLE_COMPLEX_PRECISION" ${ARGN})
"${name}_scomplex" "${use_lib_linops}" "GKO_BENCHMARK_USE_SINGLE_COMPLEX_PRECISION" "c" ${ARGN})
Comment on lines -90 to +94
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we keep the same suffix for executable and library?
Use dszc or single, double, ...,

endfunction(ginkgo_add_typed_benchmark_executables)


if (GINKGO_BUILD_CUDA)
enable_language(CUDA)
ginkgo_benchmark_cusparse_linops(d GKO_BENCHMARK_USE_DOUBLE_PRECISION)
ginkgo_benchmark_cusparse_linops(s GKO_BENCHMARK_USE_SINGLE_PRECISION)
ginkgo_benchmark_cusparse_linops(z GKO_BENCHMARK_USE_DOUBLE_COMPLEX_PRECISION)
ginkgo_benchmark_cusparse_linops(c GKO_BENCHMARK_USE_SINGLE_COMPLEX_PRECISION)
add_library(cuda_timer utils/cuda_timer.cu)
target_link_libraries(cuda_timer ginkgo ${CUDA_RUNTIME_LIBS})
target_include_directories(cuda_timer SYSTEM PRIVATE ${CUDA_INCLUDE_DIRS})
endif()
if (GINKGO_BUILD_HIP)
ginkgo_benchmark_hipsparse_linops(d GKO_BENCHMARK_USE_DOUBLE_PRECISION)
ginkgo_benchmark_hipsparse_linops(s GKO_BENCHMARK_USE_SINGLE_PRECISION)
ginkgo_benchmark_hipsparse_linops(z GKO_BENCHMARK_USE_DOUBLE_COMPLEX_PRECISION)
ginkgo_benchmark_hipsparse_linops(c GKO_BENCHMARK_USE_SINGLE_COMPLEX_PRECISION)
add_library(hip_timer utils/hip_timer.hip.cpp)
EXECUTE_PROCESS(COMMAND ${HIP_PATH}/bin/hipconfig --cpp_config OUTPUT_VARIABLE HIP_CXX_FLAGS)
set_target_properties(hip_timer PROPERTIES COMPILE_FLAGS ${HIP_CXX_FLAGS})
# for some reason, HIP creates a dependency on Threads::Threads here, so we
# need to find it
find_package(Threads REQUIRED)
find_package(HIP REQUIRED)
target_include_directories(hip_timer SYSTEM PRIVATE ${HSA_HEADER} ${HIP_INCLUDE_DIRS})
target_link_libraries(hip_timer ginkgo)
endif()


add_subdirectory(blas)
add_subdirectory(conversions)
add_subdirectory(matrix_generator)
Expand Down
Loading