From 6d4a017fbf728cec329c60150afc752da58d3313 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Wed, 3 Nov 2021 21:26:16 +0100
Subject: [PATCH] review updates

Co-authored-by: Yuhsiang Tsai <yhmtsai@gmail.com>
Co-authored-by: Terry Cojean <terry.cojean@kit.edu>
---
 BENCHMARKING.md                      |   4 +-
 benchmark/CMakeLists.txt             |  24 +--
 benchmark/utils/cuda_linops.cu       | 311 +++++++++++++--------------
 benchmark/utils/formats.hpp          | 127 ++++++-----
 benchmark/utils/hip_linops.hip.cpp   | 128 +++++------
 benchmark/utils/sparselib_linops.hpp |  57 +++--
 benchmark/utils/timer.hpp            |   8 +-
 7 files changed, 334 insertions(+), 325 deletions(-)

diff --git a/BENCHMARKING.md b/BENCHMARKING.md
index bc419f57386..f62540264cd 100644
--- a/BENCHMARKING.md
+++ b/BENCHMARKING.md
@@ -287,12 +287,12 @@ The supported environment variables are described in the following list:
 * `PRECONDS={jacobi,ic,ilu,paric,parict,parilu,parilut,ic-isai,ilu-isai,paric-isai,parict-isai,parilu-isai,parilut-isai,none}`
     the preconditioners to use for either `solver` or `preconditioner` benchmarks.
     Multiple options can be passed to this variable. Default is `none`.
-* `FORMATS={csr,coo,ell,hybrid,sellp,hybridxx,cusp_xx,hipsp_xx}` the matrix
+* `FORMATS={csr,coo,ell,hybrid,sellp,hybridxx,cusparse_xx,hipsparse_xx}` the matrix
     formats to benchmark for the `spmv` phase of the benchmark. Run
     `${ginkgo_build_dir}/benchmark/spmv/spmv --help` for a full list. If needed,
     multiple options for hybrid with different optimization parameters are
     available. Depending on the libraries available at build time, vendor
-    library formats (cuSPARSE with `cusp_` prefix or hipSPARSE with `hipsp_`
+    library formats (cuSPARSE with `cusparse_` prefix or hipSPARSE with `hipsparse_`
     prefix) can be used as well. Multiple options can be passed. The default is
     `csr,coo,ell,hybrid,sellp`.
 * `SOLVERS={bicgstab,bicg,cg,cgs,fcg,gmres,cb_gmres_{keep,reduce1,reduce2,integer,ireduce1,ireduce2},lower_trs,upper_trs}`
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
index 80d9838a29a..af8d38e3eb3 100644
--- a/benchmark/CMakeLists.txt
+++ b/benchmark/CMakeLists.txt
@@ -10,18 +10,16 @@ function(ginkgo_benchmark_add_tuning_maybe name)
     endif()
 endfunction()
 
-function(ginkgo_benchmark_cusp_linops type def)
+function(ginkgo_benchmark_cusparse_linops type def)
     add_library(cusparse_linops_${type} utils/cuda_linops.cu)
     # make the dependency public to catch issues
     target_compile_definitions(cusparse_linops_${type} PUBLIC ${def})
     target_link_libraries(cusparse_linops_${type} Ginkgo::ginkgo ${CUDA_RUNTIME_LIBS} ${CUBLAS} ${CUSPARSE})
     target_include_directories(cusparse_linops_${type} SYSTEM PRIVATE ${CUDA_INCLUDE_DIRS})
-    if(CMAKE_CUDA_COMPILER_VERSION GREATER_EQUAL "9.2")
-        target_compile_definitions(cusparse_linops_${type} PRIVATE ALLOWMP=1)
-    endif()
+    target_compile_definitions(cusparse_linops_${type} PRIVATE ALLOWMP=1)
 endfunction()
 
-function(ginkgo_benchmark_hipsp_linops type def)
+function(ginkgo_benchmark_hipsparse_linops type def)
     add_library(hipsparse_linops_${type} utils/hip_linops.hip.cpp)
     target_compile_definitions(hipsparse_linops_${type} PUBLIC ${def})
     EXECUTE_PROCESS(COMMAND ${HIP_PATH}/bin/hipconfig --cpp_config OUTPUT_VARIABLE HIP_CXX_FLAGS)
@@ -99,19 +97,19 @@ endfunction(ginkgo_add_typed_benchmark_executables)
 
 if (GINKGO_BUILD_CUDA)
     enable_language(CUDA)
-    ginkgo_benchmark_cusp_linops(d GKO_BENCHMARK_USE_DOUBLE_PRECISION)
-    ginkgo_benchmark_cusp_linops(s GKO_BENCHMARK_USE_SINGLE_PRECISION)
-    ginkgo_benchmark_cusp_linops(z GKO_BENCHMARK_USE_DOUBLE_COMPLEX_PRECISION)
-    ginkgo_benchmark_cusp_linops(c GKO_BENCHMARK_USE_SINGLE_COMPLEX_PRECISION)
+    ginkgo_benchmark_cusparse_linops(d GKO_BENCHMARK_USE_DOUBLE_PRECISION)
+    ginkgo_benchmark_cusparse_linops(s GKO_BENCHMARK_USE_SINGLE_PRECISION)
+    ginkgo_benchmark_cusparse_linops(z GKO_BENCHMARK_USE_DOUBLE_COMPLEX_PRECISION)
+    ginkgo_benchmark_cusparse_linops(c GKO_BENCHMARK_USE_SINGLE_COMPLEX_PRECISION)
     add_library(cuda_timer utils/cuda_timer.cu)
     target_link_libraries(cuda_timer ginkgo ${CUDA_RUNTIME_LIBS})
     target_include_directories(cuda_timer SYSTEM PRIVATE ${CUDA_INCLUDE_DIRS})
 endif()
 if (GINKGO_BUILD_HIP)
-    ginkgo_benchmark_hipsp_linops(d GKO_BENCHMARK_USE_DOUBLE_PRECISION)
-    ginkgo_benchmark_hipsp_linops(s GKO_BENCHMARK_USE_SINGLE_PRECISION)
-    ginkgo_benchmark_hipsp_linops(z GKO_BENCHMARK_USE_DOUBLE_COMPLEX_PRECISION)
-    ginkgo_benchmark_hipsp_linops(c GKO_BENCHMARK_USE_SINGLE_COMPLEX_PRECISION)
+    ginkgo_benchmark_hipsparse_linops(d GKO_BENCHMARK_USE_DOUBLE_PRECISION)
+    ginkgo_benchmark_hipsparse_linops(s GKO_BENCHMARK_USE_SINGLE_PRECISION)
+    ginkgo_benchmark_hipsparse_linops(z GKO_BENCHMARK_USE_DOUBLE_COMPLEX_PRECISION)
+    ginkgo_benchmark_hipsparse_linops(c GKO_BENCHMARK_USE_SINGLE_COMPLEX_PRECISION)
     add_library(hip_timer utils/hip_timer.hip.cpp)
     EXECUTE_PROCESS(COMMAND ${HIP_PATH}/bin/hipconfig --cpp_config OUTPUT_VARIABLE HIP_CXX_FLAGS)
     set_target_properties(hip_timer PROPERTIES COMPILE_FLAGS ${HIP_CXX_FLAGS})
diff --git a/benchmark/utils/cuda_linops.cu b/benchmark/utils/cuda_linops.cu
index 1977f0e382e..5e2cf680183 100644
--- a/benchmark/utils/cuda_linops.cu
+++ b/benchmark/utils/cuda_linops.cu
@@ -49,27 +49,27 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "cuda/base/types.hpp"
 
 
-class cusp_csr {};
-class cusp_csrmp {};
-class cusp_csrmm {};
-class cusp_hybrid {};
-class cusp_coo {};
-class cusp_ell {};
-class cusp_gcsr {};
-class cusp_gcoo {};
-class cusp_csrex {};
-class cusp_gcsr2 {};
+class cusparse_csr {};
+class cusparse_csrmp {};
+class cusparse_csrmm {};
+class cusparse_hybrid {};
+class cusparse_coo {};
+class cusparse_ell {};
+class cusparse_gcsr {};
+class cusparse_gcoo {};
+class cusparse_csrex {};
+class cusparse_gcsr2 {};
 
 
 namespace detail {
 
 
-class CuspBase : public gko::LinOp {
+class CusparseBase : public gko::LinOp {
 public:
     cusparseMatDescr_t get_descr() const { return this->descr_.get(); }
 
-    // Return shared pointer not plain pointer such that CuspGenericSpMV uses
-    // gko::Array to allocate buffer.
+    // Return shared pointer not plain pointer such that CusparseGenericSpMV
+    // uses gko::Array to allocate buffer.
     std::shared_ptr<const gko::CudaExecutor> get_gpu_exec() const
     {
         return gpu_exec_;
@@ -82,8 +82,8 @@ protected:
         GKO_NOT_IMPLEMENTED;
     }
 
-    CuspBase(std::shared_ptr<const gko::Executor> exec,
-             const gko::dim<2>& size = gko::dim<2>{})
+    CusparseBase(std::shared_ptr<const gko::Executor> exec,
+                 const gko::dim<2>& size = gko::dim<2>{})
         : gko::LinOp(exec, size)
     {
         gpu_exec_ = std::dynamic_pointer_cast<const gko::CudaExecutor>(exec);
@@ -93,11 +93,11 @@ protected:
         this->initialize_descr();
     }
 
-    ~CuspBase() = default;
+    ~CusparseBase() = default;
 
-    CuspBase(const CuspBase& other) = delete;
+    CusparseBase(const CusparseBase& other) = delete;
 
-    CuspBase& operator=(const CuspBase& other)
+    CusparseBase& operator=(const CusparseBase& other)
     {
         if (this != &other) {
             gko::LinOp::operator=(other);
@@ -127,17 +127,18 @@ private:
 };
 
 
-#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000)
+#if CUDA_VERSION < 11000
 
 
 template <typename ValueType = gko::default_precision,
           typename IndexType = gko::int32>
-class CuspCsrmp
-    : public gko::EnableLinOp<CuspCsrmp<ValueType, IndexType>, CuspBase>,
+class CusparseCsrmp
+    : public gko::EnableLinOp<CusparseCsrmp<ValueType, IndexType>,
+                              CusparseBase>,
       public gko::ReadableFromMatrixData<ValueType, IndexType>,
-      public gko::EnableCreateMethod<CuspCsrmp<ValueType, IndexType>> {
-    friend class gko::EnableCreateMethod<CuspCsrmp>;
-    friend class gko::EnablePolymorphicObject<CuspCsrmp, CuspBase>;
+      public gko::EnableCreateMethod<CusparseCsrmp<ValueType, IndexType>> {
+    friend class gko::EnableCreateMethod<CusparseCsrmp>;
+    friend class gko::EnablePolymorphicObject<CusparseCsrmp, CusparseBase>;
 
 public:
     using csr = gko::matrix::Csr<ValueType, IndexType>;
@@ -173,9 +174,9 @@ protected:
             &scalars.get_const_data()[1], dx);
     }
 
-    CuspCsrmp(std::shared_ptr<const gko::Executor> exec,
-              const gko::dim<2>& size = gko::dim<2>{})
-        : gko::EnableLinOp<CuspCsrmp, CuspBase>(exec, size),
+    CusparseCsrmp(std::shared_ptr<const gko::Executor> exec,
+                  const gko::dim<2>& size = gko::dim<2>{})
+        : gko::EnableLinOp<CusparseCsrmp, CusparseBase>(exec, size),
           csr_(std::move(
               csr::create(exec, std::make_shared<typename csr::classical>()))),
           trans_(CUSPARSE_OPERATION_NON_TRANSPOSE)
@@ -192,12 +193,12 @@ private:
 
 template <typename ValueType = gko::default_precision,
           typename IndexType = gko::int32>
-class CuspCsr
-    : public gko::EnableLinOp<CuspCsr<ValueType, IndexType>, CuspBase>,
-      public gko::EnableCreateMethod<CuspCsr<ValueType, IndexType>>,
+class CusparseCsr
+    : public gko::EnableLinOp<CusparseCsr<ValueType, IndexType>, CusparseBase>,
+      public gko::EnableCreateMethod<CusparseCsr<ValueType, IndexType>>,
       public gko::ReadableFromMatrixData<ValueType, IndexType> {
-    friend class gko::EnableCreateMethod<CuspCsr>;
-    friend class gko::EnablePolymorphicObject<CuspCsr, CuspBase>;
+    friend class gko::EnableCreateMethod<CusparseCsr>;
+    friend class gko::EnablePolymorphicObject<CusparseCsr, CusparseBase>;
 
 public:
     using csr = gko::matrix::Csr<ValueType, IndexType>;
@@ -233,9 +234,9 @@ protected:
             &scalars.get_const_data()[1], dx);
     }
 
-    CuspCsr(std::shared_ptr<const gko::Executor> exec,
-            const gko::dim<2>& size = gko::dim<2>{})
-        : gko::EnableLinOp<CuspCsr, CuspBase>(exec, size),
+    CusparseCsr(std::shared_ptr<const gko::Executor> exec,
+                const gko::dim<2>& size = gko::dim<2>{})
+        : gko::EnableLinOp<CusparseCsr, CusparseBase>(exec, size),
           csr_(std::move(
               csr::create(exec, std::make_shared<typename csr::classical>()))),
           trans_(CUSPARSE_OPERATION_NON_TRANSPOSE)
@@ -252,12 +253,13 @@ private:
 
 template <typename ValueType = gko::default_precision,
           typename IndexType = gko::int32>
-class CuspCsrmm
-    : public gko::EnableLinOp<CuspCsrmm<ValueType, IndexType>, CuspBase>,
-      public gko::EnableCreateMethod<CuspCsrmm<ValueType, IndexType>>,
+class CusparseCsrmm
+    : public gko::EnableLinOp<CusparseCsrmm<ValueType, IndexType>,
+                              CusparseBase>,
+      public gko::EnableCreateMethod<CusparseCsrmm<ValueType, IndexType>>,
       public gko::ReadableFromMatrixData<ValueType, IndexType> {
-    friend class gko::EnableCreateMethod<CuspCsrmm>;
-    friend class gko::EnablePolymorphicObject<CuspCsrmm, CuspBase>;
+    friend class gko::EnableCreateMethod<CusparseCsrmm>;
+    friend class gko::EnablePolymorphicObject<CusparseCsrmm, CusparseBase>;
 
 public:
     using csr = gko::matrix::Csr<ValueType, IndexType>;
@@ -294,9 +296,9 @@ protected:
             dense_x->get_size()[0]);
     }
 
-    CuspCsrmm(std::shared_ptr<const gko::Executor> exec,
-              const gko::dim<2>& size = gko::dim<2>{})
-        : gko::EnableLinOp<CuspCsrmm, CuspBase>(exec, size),
+    CusparseCsrmm(std::shared_ptr<const gko::Executor> exec,
+                  const gko::dim<2>& size = gko::dim<2>{})
+        : gko::EnableLinOp<CusparseCsrmm, CusparseBase>(exec, size),
           csr_(std::move(
               csr::create(exec, std::make_shared<typename csr::classical>()))),
           trans_(CUSPARSE_OPERATION_NON_TRANSPOSE)
@@ -311,17 +313,18 @@ private:
 };
 
 
-#endif  // defined(CUDA_VERSION) && (CUDA_VERSION < 11000)
+#endif  // CUDA_VERSION < 11000
 
 
 template <typename ValueType = gko::default_precision,
           typename IndexType = gko::int32>
-class CuspCsrEx
-    : public gko::EnableLinOp<CuspCsrEx<ValueType, IndexType>, CuspBase>,
-      public gko::EnableCreateMethod<CuspCsrEx<ValueType, IndexType>>,
+class CusparseCsrEx
+    : public gko::EnableLinOp<CusparseCsrEx<ValueType, IndexType>,
+                              CusparseBase>,
+      public gko::EnableCreateMethod<CusparseCsrEx<ValueType, IndexType>>,
       public gko::ReadableFromMatrixData<ValueType, IndexType> {
-    friend class gko::EnableCreateMethod<CuspCsrEx>;
-    friend class gko::EnablePolymorphicObject<CuspCsrEx, CuspBase>;
+    friend class gko::EnableCreateMethod<CusparseCsrEx>;
+    friend class gko::EnablePolymorphicObject<CusparseCsrEx, CusparseBase>;
 
 public:
     using csr = gko::matrix::Csr<ValueType, IndexType>;
@@ -338,9 +341,9 @@ public:
         return csr_->get_num_stored_elements();
     }
 
-    CuspCsrEx(const CuspCsrEx& other) = delete;
+    CusparseCsrEx(const CusparseCsrEx& other) = delete;
 
-    CuspCsrEx& operator=(const CuspCsrEx& other) = default;
+    CusparseCsrEx& operator=(const CusparseCsrEx& other) = default;
 
 protected:
     void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override
@@ -378,9 +381,9 @@ protected:
     }
 
 
-    CuspCsrEx(std::shared_ptr<const gko::Executor> exec,
-              const gko::dim<2>& size = gko::dim<2>{})
-        : gko::EnableLinOp<CuspCsrEx, CuspBase>(exec, size),
+    CusparseCsrEx(std::shared_ptr<const gko::Executor> exec,
+                  const gko::dim<2>& size = gko::dim<2>{})
+        : gko::EnableLinOp<CusparseCsrEx, CusparseBase>(exec, size),
           csr_(std::move(
               csr::create(exec, std::make_shared<typename csr::classical>()))),
           trans_(CUSPARSE_OPERATION_NON_TRANSPOSE),
@@ -399,21 +402,22 @@ private:
 };
 
 
-#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000)
+#if CUDA_VERSION < 11000
 
 
 template <typename ValueType = gko::default_precision,
           typename IndexType = gko::int32,
           cusparseHybPartition_t Partition = CUSPARSE_HYB_PARTITION_AUTO,
           int Threshold = 0>
-class CuspHybrid
+class CusparseHybrid
     : public gko::EnableLinOp<
-          CuspHybrid<ValueType, IndexType, Partition, Threshold>, CuspBase>,
+          CusparseHybrid<ValueType, IndexType, Partition, Threshold>,
+          CusparseBase>,
       public gko::EnableCreateMethod<
-          CuspHybrid<ValueType, IndexType, Partition, Threshold>>,
+          CusparseHybrid<ValueType, IndexType, Partition, Threshold>>,
       public gko::ReadableFromMatrixData<ValueType, IndexType> {
-    friend class gko::EnableCreateMethod<CuspHybrid>;
-    friend class gko::EnablePolymorphicObject<CuspHybrid, CuspBase>;
+    friend class gko::EnableCreateMethod<CusparseHybrid>;
+    friend class gko::EnablePolymorphicObject<CusparseHybrid, CusparseBase>;
 
 public:
     using csr = gko::matrix::Csr<ValueType, IndexType>;
@@ -435,21 +439,21 @@ public:
             Threshold, Partition);
     }
 
-    ~CuspHybrid() override
+    ~CusparseHybrid() override
     {
         const auto id = this->get_gpu_exec()->get_device_id();
         try {
             gko::cuda::device_guard g{id};
             GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDestroyHybMat(hyb_));
         } catch (const std::exception& e) {
-            std::cerr << "Error when unallocating CuspHybrid hyb_ matrix: "
+            std::cerr << "Error when unallocating CusparseHybrid hyb_ matrix: "
                       << e.what() << std::endl;
         }
     }
 
-    CuspHybrid(const CuspHybrid& other) = delete;
+    CusparseHybrid(const CusparseHybrid& other) = delete;
 
-    CuspHybrid& operator=(const CuspHybrid& other) = default;
+    CusparseHybrid& operator=(const CusparseHybrid& other) = default;
 
 protected:
     void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override
@@ -467,9 +471,9 @@ protected:
             &scalars.get_const_data()[1], dx);
     }
 
-    CuspHybrid(std::shared_ptr<const gko::Executor> exec,
-               const gko::dim<2>& size = gko::dim<2>{})
-        : gko::EnableLinOp<CuspHybrid, CuspBase>(exec, size),
+    CusparseHybrid(std::shared_ptr<const gko::Executor> exec,
+                   const gko::dim<2>& size = gko::dim<2>{})
+        : gko::EnableLinOp<CusparseHybrid, CusparseBase>(exec, size),
           trans_(CUSPARSE_OPERATION_NON_TRANSPOSE)
     {
         const auto id = this->get_gpu_exec()->get_device_id();
@@ -486,20 +490,19 @@ private:
 };
 
 
-#endif  // defined(CUDA_VERSION) && (CUDA_VERSION < 11000)
+#endif  // CUDA_VERSION < 11000
 
 
-#if defined(CUDA_VERSION) &&  \
-    (CUDA_VERSION >= 11000 || \
-     ((CUDA_VERSION >= 10020) && !(defined(_WIN32) || defined(__CYGWIN__))))
+#if CUDA_VERSION >= 11000 || \
+    ((CUDA_VERSION >= 10020) && !(defined(_WIN32) || defined(__CYGWIN__)))
 
 
 template <typename ValueType>
-void cusp_generic_spmv(std::shared_ptr<const gko::CudaExecutor> gpu_exec,
-                       const cusparseSpMatDescr_t mat,
-                       const gko::Array<ValueType>& scalars,
-                       const gko::LinOp* b, gko::LinOp* x,
-                       cusparseOperation_t trans, cusparseSpMVAlg_t alg)
+void cusparse_generic_spmv(std::shared_ptr<const gko::CudaExecutor> gpu_exec,
+                           const cusparseSpMatDescr_t mat,
+                           const gko::Array<ValueType>& scalars,
+                           const gko::LinOp* b, gko::LinOp* x,
+                           cusparseOperation_t trans, cusparseSpMVAlg_t alg)
 {
     cudaDataType_t cu_value = gko::kernels::cuda::cuda_data_type<ValueType>();
     using gko::kernels::cuda::as_culibs_type;
@@ -536,13 +539,14 @@ void cusp_generic_spmv(std::shared_ptr<const gko::CudaExecutor> gpu_exec,
 template <typename ValueType = gko::default_precision,
           typename IndexType = gko::int32,
           cusparseSpMVAlg_t Alg = CUSPARSE_MV_ALG_DEFAULT>
-class CuspGenericCsr
-    : public gko::EnableLinOp<CuspGenericCsr<ValueType, IndexType, Alg>,
-                              CuspBase>,
-      public gko::EnableCreateMethod<CuspGenericCsr<ValueType, IndexType, Alg>>,
+class CusparseGenericCsr
+    : public gko::EnableLinOp<CusparseGenericCsr<ValueType, IndexType, Alg>,
+                              CusparseBase>,
+      public gko::EnableCreateMethod<
+          CusparseGenericCsr<ValueType, IndexType, Alg>>,
       public gko::ReadableFromMatrixData<ValueType, IndexType> {
-    friend class gko::EnableCreateMethod<CuspGenericCsr>;
-    friend class gko::EnablePolymorphicObject<CuspGenericCsr, CuspBase>;
+    friend class gko::EnableCreateMethod<CusparseGenericCsr>;
+    friend class gko::EnablePolymorphicObject<CusparseGenericCsr, CusparseBase>;
 
 public:
     using csr = gko::matrix::Csr<ValueType, IndexType>;
@@ -570,32 +574,33 @@ public:
         return csr_->get_num_stored_elements();
     }
 
-    ~CuspGenericCsr() override
+    ~CusparseGenericCsr() override
     {
         const auto id = this->get_gpu_exec()->get_device_id();
         try {
             gko::cuda::device_guard g{id};
             GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDestroySpMat(mat_));
         } catch (const std::exception& e) {
-            std::cerr << "Error when unallocating CuspGenericCsr mat_ matrix: "
-                      << e.what() << std::endl;
+            std::cerr
+                << "Error when unallocating CusparseGenericCsr mat_ matrix: "
+                << e.what() << std::endl;
         }
     }
 
-    CuspGenericCsr(const CuspGenericCsr& other) = delete;
+    CusparseGenericCsr(const CusparseGenericCsr& other) = delete;
 
-    CuspGenericCsr& operator=(const CuspGenericCsr& other) = default;
+    CusparseGenericCsr& operator=(const CusparseGenericCsr& other) = default;
 
 protected:
     void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override
     {
-        cusp_generic_spmv(this->get_gpu_exec(), mat_, scalars, b, x, trans_,
-                          Alg);
+        cusparse_generic_spmv(this->get_gpu_exec(), mat_, scalars, b, x, trans_,
+                              Alg);
     }
 
-    CuspGenericCsr(std::shared_ptr<const gko::Executor> exec,
-                   const gko::dim<2>& size = gko::dim<2>{})
-        : gko::EnableLinOp<CuspGenericCsr, CuspBase>(exec, size),
+    CusparseGenericCsr(std::shared_ptr<const gko::Executor> exec,
+                       const gko::dim<2>& size = gko::dim<2>{})
+        : gko::EnableLinOp<CusparseGenericCsr, CusparseBase>(exec, size),
           csr_(std::move(
               csr::create(exec, std::make_shared<typename csr::classical>()))),
           trans_(CUSPARSE_OPERATION_NON_TRANSPOSE)
@@ -613,12 +618,13 @@ private:
 
 template <typename ValueType = gko::default_precision,
           typename IndexType = gko::int32>
-class CuspGenericCoo
-    : public gko::EnableLinOp<CuspGenericCoo<ValueType, IndexType>, CuspBase>,
-      public gko::EnableCreateMethod<CuspGenericCoo<ValueType, IndexType>>,
+class CusparseGenericCoo
+    : public gko::EnableLinOp<CusparseGenericCoo<ValueType, IndexType>,
+                              CusparseBase>,
+      public gko::EnableCreateMethod<CusparseGenericCoo<ValueType, IndexType>>,
       public gko::ReadableFromMatrixData<ValueType, IndexType> {
-    friend class gko::EnableCreateMethod<CuspGenericCoo>;
-    friend class gko::EnablePolymorphicObject<CuspGenericCoo, CuspBase>;
+    friend class gko::EnableCreateMethod<CusparseGenericCoo>;
+    friend class gko::EnablePolymorphicObject<CusparseGenericCoo, CusparseBase>;
 
 public:
     using coo = gko::matrix::Coo<ValueType, IndexType>;
@@ -646,32 +652,33 @@ public:
         return coo_->get_num_stored_elements();
     }
 
-    ~CuspGenericCoo() override
+    ~CusparseGenericCoo() override
     {
         const auto id = this->get_gpu_exec()->get_device_id();
         try {
             gko::cuda::device_guard g{id};
             GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDestroySpMat(mat_));
         } catch (const std::exception& e) {
-            std::cerr << "Error when unallocating CuspGenericCoo mat_ matrix: "
-                      << e.what() << std::endl;
+            std::cerr
+                << "Error when unallocating CusparseGenericCoo mat_ matrix: "
+                << e.what() << std::endl;
         }
     }
 
-    CuspGenericCoo(const CuspGenericCoo& other) = delete;
+    CusparseGenericCoo(const CusparseGenericCoo& other) = delete;
 
-    CuspGenericCoo& operator=(const CuspGenericCoo& other) = default;
+    CusparseGenericCoo& operator=(const CusparseGenericCoo& other) = default;
 
 protected:
     void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override
     {
-        cusp_generic_spmv(this->get_gpu_exec(), mat_, scalars, b, x, trans_,
-                          CUSPARSE_MV_ALG_DEFAULT);
+        cusparse_generic_spmv(this->get_gpu_exec(), mat_, scalars, b, x, trans_,
+                              CUSPARSE_MV_ALG_DEFAULT);
     }
 
-    CuspGenericCoo(std::shared_ptr<const gko::Executor> exec,
-                   const gko::dim<2>& size = gko::dim<2>{})
-        : gko::EnableLinOp<CuspGenericCoo, CuspBase>(exec, size),
+    CusparseGenericCoo(std::shared_ptr<const gko::Executor> exec,
+                       const gko::dim<2>& size = gko::dim<2>{})
+        : gko::EnableLinOp<CusparseGenericCoo, CusparseBase>(exec, size),
           coo_(std::move(coo::create(exec))),
           trans_(CUSPARSE_OPERATION_NON_TRANSPOSE)
     {}
@@ -686,66 +693,58 @@ private:
 };
 
 
-#endif  // defined(CUDA_VERSION) && (CUDA_VERSION >= 11000 || ((CUDA_VERSION >=
-        // 10020) && !(defined(_WIN32) || defined(__CYGWIN__))))
+#endif  // CUDA_VERSION >= 11000 || ((CUDA_VERSION >= 10020) &&
+        // !(defined(_WIN32) || defined(__CYGWIN__)))
 
 
 }  // namespace detail
 
 
-// Some shortcuts
+IMPL_CREATE_SPARSELIB_LINOP(cusparse_csrex, detail::CusparseCsrEx<etype, itype>)
 
+#if CUDA_VERSION < 11000
+IMPL_CREATE_SPARSELIB_LINOP(cusparse_csr, detail::CusparseCsr<etype, itype>);
+IMPL_CREATE_SPARSELIB_LINOP(cusparse_csrmp,
+                            detail::CusparseCsrmp<etype, itype>);
+IMPL_CREATE_SPARSELIB_LINOP(cusparse_csrmm,
+                            detail::CusparseCsrmm<etype, itype>);
+#else   // CUDA_VERSION >= 11000
+IMPL_CREATE_SPARSELIB_LINOP(cusparse_csr,
+                            detail::CusparseGenericCsr<etype, itype>);
+STUB_CREATE_SPARSELIB_LINOP(cusparse_csrmp);
+STUB_CREATE_SPARSELIB_LINOP(cusparse_csrmm);
+#endif  // CUDA_VERSION >= 11000
 
-#define IMPL_CREATE_SPARSELIB_LINOP(_type, ...)                \
-    template <>                                                \
-    std::unique_ptr<gko::LinOp> create_sparselib_linop<_type>( \
-        std::shared_ptr<const gko::Executor> exec)             \
-    {                                                          \
-        return __VA_ARGS__::create(exec);                      \
-    }
-#define STUB_CREATE_SPARSELIB_LINOP(_type)                     \
-    template <>                                                \
-    std::unique_ptr<gko::LinOp> create_sparselib_linop<_type>( \
-        std::shared_ptr<const gko::Executor> exec) GKO_NOT_IMPLEMENTED;
 
-
-IMPL_CREATE_SPARSELIB_LINOP(cusp_csrex, detail::CuspCsrEx<etype, itype>)
-
-#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000)
-IMPL_CREATE_SPARSELIB_LINOP(cusp_csr, detail::CuspCsr<etype, itype>)
-IMPL_CREATE_SPARSELIB_LINOP(cusp_csrmp, detail::CuspCsrmp<etype, itype>)
-IMPL_CREATE_SPARSELIB_LINOP(cusp_csrmm, detail::CuspCsrmm<etype, itype>)
-#else
-STUB_CREATE_SPARSELIB_LINOP(cusp_csr)
-STUB_CREATE_SPARSELIB_LINOP(cusp_csrmp)
-STUB_CREATE_SPARSELIB_LINOP(cusp_csrmm)
-#endif  // not (defined(CUDA_VERSION) && (CUDA_VERSION < 11000))
-
-
-#if defined(CUDA_VERSION) &&  \
-    (CUDA_VERSION >= 11000 || \
-     ((CUDA_VERSION >= 10020) && !(defined(_WIN32) || defined(__CYGWIN__))))
-IMPL_CREATE_SPARSELIB_LINOP(cusp_gcsr, detail::CuspGenericCsr<etype, itype>)
+#if CUDA_VERSION >= 11000 || \
+    ((CUDA_VERSION >= 10020) && !(defined(_WIN32) || defined(__CYGWIN__)))
+IMPL_CREATE_SPARSELIB_LINOP(cusparse_gcsr,
+                            detail::CusparseGenericCsr<etype, itype>);
 IMPL_CREATE_SPARSELIB_LINOP(
-    cusp_gcsr2, detail::CuspGenericCsr<etype, itype, CUSPARSE_CSRMV_ALG2>)
-IMPL_CREATE_SPARSELIB_LINOP(cusp_gcoo, detail::CuspGenericCoo<etype, itype>)
+    cusparse_gcsr2,
+    detail::CusparseGenericCsr<etype, itype, CUSPARSE_CSRMV_ALG2>);
+IMPL_CREATE_SPARSELIB_LINOP(cusparse_gcoo,
+                            detail::CusparseGenericCoo<etype, itype>);
 #else
-STUB_CREATE_SPARSELIB_LINOP(cusp_gcsr)
-STUB_CREATE_SPARSELIB_LINOP(cusp_gcsr2)
-STUB_CREATE_SPARSELIB_LINOP(cusp_gcoo)
-#endif  // not (defined(CUDA_VERSION) && (CUDA_VERSION >= 11000 ||
-        // ((CUDA_VERSION >= 10020) && !(defined(_WIN32) ||
-        // defined(__CYGWIN__)))))
+STUB_CREATE_SPARSELIB_LINOP(cusparse_gcsr);
+STUB_CREATE_SPARSELIB_LINOP(cusparse_gcsr2);
+STUB_CREATE_SPARSELIB_LINOP(cusparse_gcoo);
+#endif  // CUDA_VERSION < 11000 && ((CUDA_VERSION < 10020) || (defined(_WIN32)
+        // && defined(__CYGWIN__))))
 
 
-#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000)
+#if CUDA_VERSION < 11000
 IMPL_CREATE_SPARSELIB_LINOP(
-    cusp_coo, detail::CuspHybrid<etype, itype, CUSPARSE_HYB_PARTITION_USER, 0>)
+    cusparse_coo,
+    detail::CusparseHybrid<etype, itype, CUSPARSE_HYB_PARTITION_USER, 0>);
 IMPL_CREATE_SPARSELIB_LINOP(
-    cusp_ell, detail::CuspHybrid<etype, itype, CUSPARSE_HYB_PARTITION_MAX, 0>)
-IMPL_CREATE_SPARSELIB_LINOP(cusp_hybrid, detail::CuspHybrid<etype, itype>)
-#else
-STUB_CREATE_SPARSELIB_LINOP(cusp_coo)
-STUB_CREATE_SPARSELIB_LINOP(cusp_ell)
-STUB_CREATE_SPARSELIB_LINOP(cusp_hybrid)
-#endif  // not (defined(CUDA_VERSION) && (CUDA_VERSION < 11000))
+    cusparse_ell,
+    detail::CusparseHybrid<etype, itype, CUSPARSE_HYB_PARTITION_MAX, 0>);
+IMPL_CREATE_SPARSELIB_LINOP(cusparse_hybrid,
+                            detail::CusparseHybrid<etype, itype>);
+#else   // CUDA_VERSION >= 11000
+IMPL_CREATE_SPARSELIB_LINOP(cusparse_coo,
+                            detail::CusparseGenericCoo<etype, itype>);
+STUB_CREATE_SPARSELIB_LINOP(cusparse_ell);
+STUB_CREATE_SPARSELIB_LINOP(cusparse_hybrid);
+#endif  // CUDA_VERSION >= 11000
diff --git a/benchmark/utils/formats.hpp b/benchmark/utils/formats.hpp
index 55aa003b361..b1a4a3583f8 100644
--- a/benchmark/utils/formats.hpp
+++ b/benchmark/utils/formats.hpp
@@ -58,64 +58,73 @@ std::string available_format =
     "hybrid60, hybrid80, hybridlimit0, hybridlimit25, hybridlimit33, "
     "hybridminstorage"
 #ifdef HAS_CUDA
-    ", cusp_csr, cusp_csrex, cusp_coo"
-    ", cusp_csrmp, cusp_csrmm, cusp_ell, cusp_hybrid"
-    ", cusp_gcsr, cusp_gcsr2, cusp_gcoo"
+    ", cusparse_csr, cusparse_csrex, cusparse_coo"
+    ", cusparse_csrmp, cusparse_csrmm, cusparse_ell, cusparse_hybrid"
+    ", cusparse_gcsr, cusparse_gcsr2, cusparse_gcoo"
 #endif  // HAS_CUDA
 #ifdef HAS_HIP
-    ", hipsp_csr, hipsp_csrmm, hipsp_coo, hipsp_ell, hipsp_hybrid"
+    ", hipsparse_csr, hipsparse_csrmm, hipsparse_coo, hipsparse_ell, "
+    "hipsparse_hybrid"
 #endif  // HAS_HIP
     ".\n";
 
 std::string format_description =
-    "coo: Coordinate storage. The CUDA kernel uses the load-balancing approach "
-    "suggested in Flegar et al.: Overcoming Load Imbalance for Irregular "
-    "Sparse Matrices.\n"
-    "csr: Compressed Sparse Row storage. Ginkgo implementation with automatic "
-    "strategy.\n"
+    "coo: Coordinate storage. The GPU kernels use the load-balancing "
+    "approach\n"
+    "     suggested in Flegar et al.: Overcoming Load Imbalance for\n"
+    "     Irregular Sparse Matrices.\n"
+    "csr: Compressed Sparse Row storage. Ginkgo implementation with\n"
+    "     automatic strategy.\n"
     "csrc: Ginkgo's CSR implementation with automatic stategy.\n"
     "csri: Ginkgo's CSR implementation with inbalance strategy.\n"
     "csrm: Ginkgo's CSR implementation with merge_path strategy.\n"
     "csrs: Ginkgo's CSR implementation with sparselib strategy.\n"
-    "ell: Ellpack format according to Bell and Garland: Efficient Sparse "
-    "Matrix-Vector Multiplication on CUDA.\n"
-    "ell-mixed: Mixed Precision Ellpack format according to Bell and Garland: "
-    "Efficient Sparse Matrix-Vector Multiplication on CUDA.\n"
+    "ell: Ellpack format according to Bell and Garland: Efficient Sparse\n"
+    "     Matrix-Vector Multiplication on CUDA.\n"
+    "ell-mixed: Mixed Precision Ellpack format according to Bell and Garland:\n"
+    "           Efficient Sparse Matrix-Vector Multiplication on CUDA.\n"
     "sellp: Sliced Ellpack uses a default block size of 32.\n"
-    "hybrid: Hybrid uses ell and coo to represent the matrix.\n"
-    "hybrid0, hybrid25, hybrid33, hybrid40, hybrid60, hybrid80: Hybrid uses "
-    "the row distribution to decide the partition.\n"
-    "hybridlimit0, hybridlimit25, hybrid33: Add the upper bound on the ell "
-    "part of hybrid0, hybrid25, hybrid33.\n"
-    "hybridminstorage: Hybrid uses the minimal storage to store the matrix."
+    "hybrid: Hybrid uses ELL and COO to represent the matrix.\n"
+    "hybrid0, hybrid25, hybrid33, hybrid40, hybrid60, hybrid80:\n"
+    "    Use 0%, 25%, ... quantiles of the row length distribution\n"
+    "    to choose number of entries stored in the ELL part.\n"
+    "hybridlimit0, hybridlimit25, hybrid33: Similar to hybrid0\n"
+    "    but with an additional absolute limit on the number of entries\n"
+    "    per row stored in ELL.\n"
+    "hybridminstorage: Use the minimal storage to store the matrix."
 #ifdef HAS_CUDA
     "\n"
-    "cusp_coo: use cusparseXhybmv with a CUSPARSE_HYB_PARTITION_USER "
-    "partition.\n"
-    "cusp_csr: benchmark CuSPARSE with the cusparseXcsrmv function.\n"
-    "cusp_ell: use cusparseXhybmv with CUSPARSE_HYB_PARTITION_MAX partition.\n"
-    "cusp_csrmp: benchmark CuSPARSE with the cusparseXcsrmv_mp function.\n"
-    "cusp_csrmm: benchmark CuSPARSE with the cusparseXcsrmv_mm function.\n"
-    "cusp_hybrid: benchmark CuSPARSE spmv with cusparseXhybmv and an automatic "
-    "partition.\n"
-    "cusp_csrex: benchmark CuSPARSE with the cusparseXcsrmvEx function."
-    "\n"
-    "cusp_gcsr: benchmark CuSPARSE with the generic csr with default "
-    "algorithm.\n"
-    "cusp_gcsr2: benchmark CuSPARSE with the generic csr with "
-    "CUSPARSE_CSRMV_ALG2.\n"
-    "cusp_gcoo: benchmark CuSPARSE with the generic coo with default "
-    "algorithm.\n"
+    "cusparse_coo: cuSPARSE COO SpMV, using cusparseXhybmv with \n"
+    "              CUSPARSE_HYB_PARTITION_USER for CUDA < 10.2, or\n"
+    "              the Generic API otherwise\n"
+    "cusparse_csr: cuSPARSE CSR SpMV, using cusparseXcsrmv for CUDA < 10.2,\n"
+    "              or the Generic API with default algorithm otherwise\n"
+    "cusparse_csrex: cuSPARSE CSR SpMV using cusparseXcsrmvEx\n"
+    "cusparse_ell: cuSPARSE ELL SpMV using cusparseXhybmv with\n"
+    "              CUSPARSE_HYB_PARTITION_MAX, available for CUDA < 11.0\n"
+    "cusparse_csrmp: cuSPARSE CSR SpMV using cusparseXcsrmv_mp,\n"
+    "                available for CUDA < 11.0\n"
+    "cusparse_csrmm: cuSPARSE CSR SpMV using cusparseXcsrmv_mm,\n"
+    "                available for CUDA < 11.0\n"
+    "cusparse_hybrid: cuSPARSE Hybrid SpMV using cusparseXhybmv\n"
+    "                 with an automatic partition, available for CUDA < 11.0\n"
+    "cusparse_gcsr: cuSPARSE CSR SpMV using Generic API with default\n"
+    "               algorithm, available for CUDA >= 10.2\n"
+    "cusparse_gcsr2: cuSPARSE CSR SpMV using Generic API with\n"
+    "                CUSPARSE_CSRMV_ALG2, available for CUDA >= 10.2\n"
+    "cusparse_gcoo: cuSPARSE Generic API with default COO SpMV,\n"
+    "               available for CUDA >= 10.2\n"
 #endif  // HAS_CUDA
 #ifdef HAS_HIP
     "\n"
-    "hipsp_csr: benchmark HipSPARSE with the hipsparseXcsrmv function.\n"
-    "hipsp_csrmm: benchmark HipSPARSE with the hipsparseXcsrmv_mm function.\n"
-    "hipsp_hybrid: benchmark HipSPARSE spmv with hipsparseXhybmv and an "
-    "automatic partition.\n"
-    "hipsp_coo: use hipsparseXhybmv with a HIPSPARSE_HYB_PARTITION_USER "
-    "partition.\n"
-    "hipsp_ell: use hipsparseXhybmv with HIPSPARSE_HYB_PARTITION_MAX partition."
+    "hipsparse_csr: hipSPARSE CSR SpMV using hipsparseXcsrmv\n"
+    "hipsparse_csrmm: hipSPARSE CSR SpMV using hipsparseXcsrmv_mm\n"
+    "hipsparse_hybrid: hipSPARSE CSR SpMV using hipsparseXhybmv\n"
+    "                  with an automatic partition\n"
+    "hipsparse_coo: hipSPARSE CSR SpMV using hipsparseXhybmv\n"
+    "               with HIPSPARSE_HYB_PARTITION_USER\n"
+    "hipsparse_ell: hipSPARSE CSR SpMV using hipsparseXhybmv\n"
+    "               with HIPSPARSE_HYB_PARTITION_MAX"
 #endif  // HAS_HIP
     ;
 
@@ -299,25 +308,25 @@ const std::map<std::string, std::function<std::unique_ptr<gko::LinOp>(
              return mat;
          }},
 #ifdef HAS_CUDA
-        {"cusp_csr", read_splib_matrix_from_data<cusp_csr>},
-        {"cusp_csrmp", read_splib_matrix_from_data<cusp_csrmp>},
-        {"cusp_csrmm", read_splib_matrix_from_data<cusp_csrmm>},
-        {"cusp_hybrid", read_splib_matrix_from_data<cusp_hybrid>},
-        {"cusp_coo", read_splib_matrix_from_data<cusp_coo>},
-        {"cusp_ell", read_splib_matrix_from_data<cusp_ell>},
-        {"cusp_csr", read_splib_matrix_from_data<cusp_gcsr>},
-        {"cusp_coo", read_splib_matrix_from_data<cusp_gcoo>},
-        {"cusp_csrex", read_splib_matrix_from_data<cusp_csrex>},
-        {"cusp_gcsr", read_splib_matrix_from_data<cusp_gcsr>},
-        {"cusp_gcsr2", read_splib_matrix_from_data<cusp_gcsr2>},
-        {"cusp_gcoo", read_splib_matrix_from_data<cusp_gcoo>},
+        {"cusparse_csr", read_splib_matrix_from_data<cusparse_csr>},
+        {"cusparse_csrmp", read_splib_matrix_from_data<cusparse_csrmp>},
+        {"cusparse_csrmm", read_splib_matrix_from_data<cusparse_csrmm>},
+        {"cusparse_hybrid", read_splib_matrix_from_data<cusparse_hybrid>},
+        {"cusparse_coo", read_splib_matrix_from_data<cusparse_coo>},
+        {"cusparse_ell", read_splib_matrix_from_data<cusparse_ell>},
+        {"cusparse_csr", read_splib_matrix_from_data<cusparse_gcsr>},
+        {"cusparse_coo", read_splib_matrix_from_data<cusparse_gcoo>},
+        {"cusparse_csrex", read_splib_matrix_from_data<cusparse_csrex>},
+        {"cusparse_gcsr", read_splib_matrix_from_data<cusparse_gcsr>},
+        {"cusparse_gcsr2", read_splib_matrix_from_data<cusparse_gcsr2>},
+        {"cusparse_gcoo", read_splib_matrix_from_data<cusparse_gcoo>},
 #endif  // HAS_CUDA
 #ifdef HAS_HIP
-        {"hipsp_csr", read_splib_matrix_from_data<hipsp_csr>},
-        {"hipsp_csrmm", read_splib_matrix_from_data<hipsp_csrmm>},
-        {"hipsp_hybrid", read_splib_matrix_from_data<hipsp_hybrid>},
-        {"hipsp_coo", read_splib_matrix_from_data<hipsp_coo>},
-        {"hipsp_ell", read_splib_matrix_from_data<hipsp_ell>},
+        {"hipsparse_csr", read_splib_matrix_from_data<hipsparse_csr>},
+        {"hipsparse_csrmm", read_splib_matrix_from_data<hipsparse_csrmm>},
+        {"hipsparse_hybrid", read_splib_matrix_from_data<hipsparse_hybrid>},
+        {"hipsparse_coo", read_splib_matrix_from_data<hipsparse_coo>},
+        {"hipsparse_ell", read_splib_matrix_from_data<hipsparse_ell>},
 #endif  // HAS_HIP
         {"hybrid", read_matrix_from_data<hybrid>},
         {"hybrid0",
diff --git a/benchmark/utils/hip_linops.hip.cpp b/benchmark/utils/hip_linops.hip.cpp
index 707b383e9ec..2b4418a2dcf 100644
--- a/benchmark/utils/hip_linops.hip.cpp
+++ b/benchmark/utils/hip_linops.hip.cpp
@@ -45,11 +45,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include "hip/base/hipsparse_bindings.hip.hpp"
 
 
-class hipsp_csr {};
-class hipsp_csrmm {};
-class hipsp_hybrid {};
-class hipsp_coo {};
-class hipsp_ell {};
+class hipsparse_csr {};
+class hipsparse_csrmm {};
+class hipsparse_hybrid {};
+class hipsparse_coo {};
+class hipsparse_ell {};
 
 
 namespace detail {
@@ -58,7 +58,7 @@ namespace detail {
 struct hipsparseMatDescr;
 
 
-class HipspBase : public gko::LinOp {
+class HipsparseBase : public gko::LinOp {
 public:
     hipsparseMatDescr_t get_descr() const { return this->descr_.get(); }
 
@@ -71,8 +71,8 @@ class HipspBase : public gko::LinOp {
         GKO_NOT_IMPLEMENTED;
     }
 
-    HipspBase(std::shared_ptr<const gko::Executor> exec,
-              const gko::dim<2>& size = gko::dim<2>{})
+    HipsparseBase(std::shared_ptr<const gko::Executor> exec,
+                  const gko::dim<2>& size = gko::dim<2>{})
         : gko::LinOp(exec, size)
     {
         gpu_exec_ = std::dynamic_pointer_cast<const gko::HipExecutor>(exec);
@@ -82,11 +82,11 @@ class HipspBase : public gko::LinOp {
         this->initialize_descr();
     }
 
-    ~HipspBase() = default;
+    ~HipsparseBase() = default;
 
-    HipspBase(const HipspBase& other) = delete;
+    HipsparseBase(const HipsparseBase& other) = delete;
 
-    HipspBase& operator=(const HipspBase& other)
+    HipsparseBase& operator=(const HipsparseBase& other)
     {
         if (this != &other) {
             gko::LinOp::operator=(other);
@@ -119,12 +119,13 @@ class HipspBase : public gko::LinOp {
 
 template <typename ValueType = gko::default_precision,
           typename IndexType = gko::int32>
-class HipspCsr
-    : public gko::EnableLinOp<HipspCsr<ValueType, IndexType>, HipspBase>,
-      public gko::EnableCreateMethod<HipspCsr<ValueType, IndexType>>,
+class HipsparseCsr
+    : public gko::EnableLinOp<HipsparseCsr<ValueType, IndexType>,
+                              HipsparseBase>,
+      public gko::EnableCreateMethod<HipsparseCsr<ValueType, IndexType>>,
       public gko::ReadableFromMatrixData<ValueType, IndexType> {
-    friend class gko::EnableCreateMethod<HipspCsr>;
-    friend class gko::EnablePolymorphicObject<HipspCsr, HipspBase>;
+    friend class gko::EnableCreateMethod<HipsparseCsr>;
+    friend class gko::EnablePolymorphicObject<HipsparseCsr, HipsparseBase>;
 
 public:
     using csr = gko::matrix::Csr<ValueType, IndexType>;
@@ -160,9 +161,9 @@ class HipspCsr
             &scalars.get_const_data()[1], dx);
     }
 
-    HipspCsr(std::shared_ptr<const gko::Executor> exec,
-             const gko::dim<2>& size = gko::dim<2>{})
-        : gko::EnableLinOp<HipspCsr, HipspBase>(exec, size),
+    HipsparseCsr(std::shared_ptr<const gko::Executor> exec,
+                 const gko::dim<2>& size = gko::dim<2>{})
+        : gko::EnableLinOp<HipsparseCsr, HipsparseBase>(exec, size),
           csr_(std::move(
               csr::create(exec, std::make_shared<typename csr::classical>()))),
           trans_(HIPSPARSE_OPERATION_NON_TRANSPOSE)
@@ -179,12 +180,13 @@ class HipspCsr
 
 template <typename ValueType = gko::default_precision,
           typename IndexType = gko::int32>
-class HipspCsrmm
-    : public gko::EnableLinOp<HipspCsrmm<ValueType, IndexType>, HipspBase>,
-      public gko::EnableCreateMethod<HipspCsrmm<ValueType, IndexType>>,
+class HipsparseCsrmm
+    : public gko::EnableLinOp<HipsparseCsrmm<ValueType, IndexType>,
+                              HipsparseBase>,
+      public gko::EnableCreateMethod<HipsparseCsrmm<ValueType, IndexType>>,
       public gko::ReadableFromMatrixData<ValueType, IndexType> {
-    friend class gko::EnableCreateMethod<HipspCsrmm>;
-    friend class gko::EnablePolymorphicObject<HipspCsrmm, HipspBase>;
+    friend class gko::EnableCreateMethod<HipsparseCsrmm>;
+    friend class gko::EnablePolymorphicObject<HipsparseCsrmm, HipsparseBase>;
 
 public:
     using csr = gko::matrix::Csr<ValueType, IndexType>;
@@ -221,9 +223,9 @@ class HipspCsrmm
             dense_x->get_size()[0]);
     }
 
-    HipspCsrmm(std::shared_ptr<const gko::Executor> exec,
-               const gko::dim<2>& size = gko::dim<2>{})
-        : gko::EnableLinOp<HipspCsrmm, HipspBase>(exec, size),
+    HipsparseCsrmm(std::shared_ptr<const gko::Executor> exec,
+                   const gko::dim<2>& size = gko::dim<2>{})
+        : gko::EnableLinOp<HipsparseCsrmm, HipsparseBase>(exec, size),
           csr_(std::move(
               csr::create(exec, std::make_shared<typename csr::classical>()))),
           trans_(HIPSPARSE_OPERATION_NON_TRANSPOSE)
@@ -242,14 +244,15 @@ template <typename ValueType = gko::default_precision,
           typename IndexType = gko::int32,
           hipsparseHybPartition_t Partition = HIPSPARSE_HYB_PARTITION_AUTO,
           int Threshold = 0>
-class HipspHybrid
+class HipsparseHybrid
     : public gko::EnableLinOp<
-          HipspHybrid<ValueType, IndexType, Partition, Threshold>, HipspBase>,
+          HipsparseHybrid<ValueType, IndexType, Partition, Threshold>,
+          HipsparseBase>,
       public gko::EnableCreateMethod<
-          HipspHybrid<ValueType, IndexType, Partition, Threshold>>,
+          HipsparseHybrid<ValueType, IndexType, Partition, Threshold>>,
       public gko::ReadableFromMatrixData<ValueType, IndexType> {
-    friend class gko::EnableCreateMethod<HipspHybrid>;
-    friend class gko::EnablePolymorphicObject<HipspHybrid, HipspBase>;
+    friend class gko::EnableCreateMethod<HipsparseHybrid>;
+    friend class gko::EnablePolymorphicObject<HipsparseHybrid, HipsparseBase>;
 
 public:
     using csr = gko::matrix::Csr<ValueType, IndexType>;
@@ -271,21 +274,21 @@ class HipspHybrid
             Threshold, Partition);
     }
 
-    ~HipspHybrid() override
+    ~HipsparseHybrid() override
     {
         const auto id = this->get_gpu_exec()->get_device_id();
         try {
             gko::hip::device_guard g{id};
             GKO_ASSERT_NO_HIPSPARSE_ERRORS(hipsparseDestroyHybMat(hyb_));
         } catch (const std::exception& e) {
-            std::cerr << "Error when unallocating HipspHybrid hyb_ matrix: "
+            std::cerr << "Error when unallocating HipsparseHybrid hyb_ matrix: "
                       << e.what() << std::endl;
         }
     }
 
-    HipspHybrid(const HipspHybrid& other) = delete;
+    HipsparseHybrid(const HipsparseHybrid& other) = delete;
 
-    HipspHybrid& operator=(const HipspHybrid& other) = default;
+    HipsparseHybrid& operator=(const HipsparseHybrid& other) = default;
 
 protected:
     void apply_impl(const gko::LinOp* b, gko::LinOp* x) const override
@@ -303,9 +306,9 @@ class HipspHybrid
             &scalars.get_const_data()[1], dx);
     }
 
-    HipspHybrid(std::shared_ptr<const gko::Executor> exec,
-                const gko::dim<2>& size = gko::dim<2>{})
-        : gko::EnableLinOp<HipspHybrid, HipspBase>(exec, size),
+    HipsparseHybrid(std::shared_ptr<const gko::Executor> exec,
+                    const gko::dim<2>& size = gko::dim<2>{})
+        : gko::EnableLinOp<HipsparseHybrid, HipsparseBase>(exec, size),
           trans_(HIPSPARSE_OPERATION_NON_TRANSPOSE)
     {
         const auto id = this->get_gpu_exec()->get_device_id();
@@ -325,39 +328,14 @@ class HipspHybrid
 }  // namespace detail
 
 
-template <>
-std::unique_ptr<gko::LinOp> create_sparselib_linop<hipsp_csr>(
-    std::shared_ptr<const gko::Executor> exec)
-{
-    return detail::HipspCsr<etype, itype>::create(exec);
-}
-
-template <>
-std::unique_ptr<gko::LinOp> create_sparselib_linop<hipsp_csrmm>(
-    std::shared_ptr<const gko::Executor> exec)
-{
-    return detail::HipspCsrmm<etype, itype>::create(exec);
-}
-
-template <>
-std::unique_ptr<gko::LinOp> create_sparselib_linop<hipsp_coo>(
-    std::shared_ptr<const gko::Executor> exec)
-{
-    return detail::HipspHybrid<etype, itype, HIPSPARSE_HYB_PARTITION_USER,
-                               0>::create(exec);
-}
-
-template <>
-std::unique_ptr<gko::LinOp> create_sparselib_linop<hipsp_ell>(
-    std::shared_ptr<const gko::Executor> exec)
-{
-    return detail::HipspHybrid<etype, itype, HIPSPARSE_HYB_PARTITION_MAX,
-                               0>::create(exec);
-}
-
-template <>
-std::unique_ptr<gko::LinOp> create_sparselib_linop<hipsp_hybrid>(
-    std::shared_ptr<const gko::Executor> exec)
-{
-    return detail::HipspHybrid<etype, itype>::create(exec);
-}
+IMPL_CREATE_SPARSELIB_LINOP(hipsparse_csr, detail::HipsparseCsr<etype, itype>);
+IMPL_CREATE_SPARSELIB_LINOP(hipsparse_csrmm,
+                            detail::HipsparseCsrmm<etype, itype>);
+IMPL_CREATE_SPARSELIB_LINOP(
+    hipsparse_coo,
+    detail::HipsparseHybrid<etype, itype, HIPSPARSE_HYB_PARTITION_USER, 0>);
+IMPL_CREATE_SPARSELIB_LINOP(
+    hipsparse_ell,
+    detail::HipsparseHybrid<etype, itype, HIPSPARSE_HYB_PARTITION_MAX, 0>);
+IMPL_CREATE_SPARSELIB_LINOP(hipsparse_hybrid,
+                            detail::HipsparseHybrid<etype, itype>);
diff --git a/benchmark/utils/sparselib_linops.hpp b/benchmark/utils/sparselib_linops.hpp
index a953fb5b4f2..7949a92d2b5 100644
--- a/benchmark/utils/sparselib_linops.hpp
+++ b/benchmark/utils/sparselib_linops.hpp
@@ -33,32 +33,51 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #ifndef GKO_BENCHMARK_UTILS_SPARSELIB_LINOPS_HPP_
 #define GKO_BENCHMARK_UTILS_SPARSELIB_LINOPS_HPP_
 
+
 #include <memory>
 
 
+#include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/lin_op.hpp>
-#include "ginkgo/core/base/exception_helpers.hpp"
 
 
-class cusp_csr;
-class cusp_csrmp;
-class cusp_csrmm;
-class cusp_hybrid;
-class cusp_coo;
-class cusp_ell;
-class cusp_gcsr;
-class cusp_gcoo;
-class cusp_csrex;
-class cusp_gcsr;
-class cusp_gcsr2;
-class cusp_gcoo;
+#define IMPL_CREATE_SPARSELIB_LINOP(_type, ...)                              \
+    template <>                                                              \
+    std::unique_ptr<gko::LinOp> create_sparselib_linop<_type>(               \
+        std::shared_ptr<const gko::Executor> exec)                           \
+    {                                                                        \
+        return __VA_ARGS__::create(exec);                                    \
+    }                                                                        \
+    static_assert(true,                                                      \
+                  "This assert is used to counter the false positive extra " \
+                  "semi-colon warnings")
+
+
+#define STUB_CREATE_SPARSELIB_LINOP(_type)                     \
+    template <>                                                \
+    std::unique_ptr<gko::LinOp> create_sparselib_linop<_type>( \
+        std::shared_ptr<const gko::Executor> exec) GKO_NOT_IMPLEMENTED
+
+
+class cusparse_csr;
+class cusparse_csrmp;
+class cusparse_csrmm;
+class cusparse_hybrid;
+class cusparse_coo;
+class cusparse_ell;
+class cusparse_gcsr;
+class cusparse_gcoo;
+class cusparse_csrex;
+class cusparse_gcsr;
+class cusparse_gcsr2;
+class cusparse_gcoo;
 
 
-class hipsp_csr;
-class hipsp_csrmm;
-class hipsp_hybrid;
-class hipsp_coo;
-class hipsp_ell;
+class hipsparse_csr;
+class hipsparse_csrmm;
+class hipsparse_hybrid;
+class hipsparse_coo;
+class hipsparse_ell;
 
 
 template <typename OpTagType>
@@ -66,4 +85,4 @@ std::unique_ptr<gko::LinOp> create_sparselib_linop(
     std::shared_ptr<const gko::Executor> exec);
 
 
-#endif  // GKO_BENCHMARK_UTILS_SPARSELIB_LINOPS_HPP_
\ No newline at end of file
+#endif  // GKO_BENCHMARK_UTILS_SPARSELIB_LINOPS_HPP_
diff --git a/benchmark/utils/timer.hpp b/benchmark/utils/timer.hpp
index 548bc91a510..31406b4d3de 100644
--- a/benchmark/utils/timer.hpp
+++ b/benchmark/utils/timer.hpp
@@ -34,12 +34,18 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define GKO_BENCHMARK_UTILS_TIMER_HPP_
 
 
-#include "benchmark/utils/timer_impl.hpp"
+#include <ginkgo/ginkgo.hpp>
+
+
+#include <memory>
 
 
 #include <gflags/gflags.h>
 
 
+#include "benchmark/utils/timer_impl.hpp"
+
+
 // Command-line arguments
 DEFINE_bool(gpu_timer, false,
             "use gpu timer based on event. It is valid only when "