ginkgo-project · upsj · Nov 5, 2022 · Nov 4, 2022
diff --git a/benchmark/utils/cuda_linops.cu b/benchmark/utils/cuda_linops.cu
@@ -349,6 +349,9 @@ private:
 #endif  // CUDA_VERSION < 11000
 
 
+#if CUDA_VERSION < 11021
+
+
 template <typename ValueType = gko::default_precision,
           typename IndexType = gko::int32>
 class CusparseCsrEx
@@ -448,6 +451,9 @@ private:
 };
 
 
+#endif  // CUDA_VERSION < 11021
+
+
 #if CUDA_VERSION < 11000
 
 
@@ -784,8 +790,12 @@ private:
 }  // namespace detail
 
 
+#if CUDA_VERSION < 11021
 IMPL_CREATE_SPARSELIB_LINOP(cusparse_csrex,
                             detail::CusparseCsrEx<etype, itype>);
+#else
+STUB_CREATE_SPARSELIB_LINOP(cusparse_csrex);
+#endif
 
 #if CUDA_VERSION < 11000
 IMPL_CREATE_SPARSELIB_LINOP(cusparse_csr, detail::CusparseCsr<etype, itype>);
@@ -805,9 +815,13 @@ STUB_CREATE_SPARSELIB_LINOP(cusparse_csrmm);
     ((CUDA_VERSION >= 10020) && !(defined(_WIN32) || defined(__CYGWIN__)))
 IMPL_CREATE_SPARSELIB_LINOP(cusparse_gcsr,
                             detail::CusparseGenericCsr<etype, itype>);
-IMPL_CREATE_SPARSELIB_LINOP(
-    cusparse_gcsr2,
-    detail::CusparseGenericCsr<etype, itype, CUSPARSE_CSRMV_ALG2>);
+#if CUDA_VERSION >= 11021
+constexpr auto csr_algo = CUSPARSE_SPMV_CSR_ALG2;
+#else
+constexpr auto csr_algo = CUSPARSE_CSRMV_ALG2;
+#endif
+IMPL_CREATE_SPARSELIB_LINOP(cusparse_gcsr2,
+                            detail::CusparseGenericCsr<etype, itype, csr_algo>);
 IMPL_CREATE_SPARSELIB_LINOP(cusparse_gcoo,
                             detail::CusparseGenericCoo<etype, itype>);
 #else

diff --git a/common/cuda_hip/matrix/dense_kernels.hpp.inc b/common/cuda_hip/matrix/dense_kernels.hpp.inc
@@ -47,7 +47,6 @@ __global__
         return;
     }
 
-    const auto bs_sq = block_size * block_size;
     const auto num_cols = num_block_cols * block_size;
     auto warp =
         group::tiled_partition<config::warp_size>(group::this_thread_block());
@@ -385,15 +384,14 @@ template <typename ValueType, typename IndexType>
 __global__ __launch_bounds__(default_block_size) void fill_in_sellp(
     size_type num_rows, size_type num_cols, size_type slice_size,
     size_type stride, const ValueType* __restrict__ source,
-    size_type* __restrict__ slice_lengths, size_type* __restrict__ slice_sets,
-    IndexType* __restrict__ col_idxs, ValueType* __restrict__ values)
+    size_type* __restrict__ slice_sets, IndexType* __restrict__ col_idxs,
+    ValueType* __restrict__ values)
 {
     const auto row = thread::get_subwarp_id_flat<config::warp_size>();
     const auto local_row = row % slice_size;
     const auto slice = row / slice_size;
 
     if (row < num_rows) {
-        const auto slice_length = slice_lengths[slice];
         auto warp = group::tiled_partition<config::warp_size>(
             group::this_thread_block());
         const auto lane = warp.thread_rank();

diff --git a/cuda/base/cusparse_bindings.hpp b/cuda/base/cusparse_bindings.hpp
@@ -199,6 +199,7 @@ inline void spmm(cusparseHandle_t handle, cusparseOperation_t opA,
 
 #if defined(CUDA_VERSION) && (CUDA_VERSION < 11000)
 
+
 #define GKO_BIND_CUSPARSE32_SPMV(ValueType, CusparseName)                    \
     inline void spmv_mp(cusparseHandle_t handle, cusparseOperation_t transA, \
                         int32 m, int32 n, int32 nnz, const ValueType* alpha, \
@@ -296,6 +297,9 @@ GKO_BIND_CUSPARSE64_SPMM(ValueType, detail::not_implemented);
 #endif  // defined(CUDA_VERSION) && (CUDA_VERSION < 11000)
 
 
+#if defined(CUDA_VERSION) && (CUDA_VERSION < 11021)
+
+
 template <typename ValueType, typename IndexType>
 inline void spmv(cusparseHandle_t handle, cusparseAlgMode_t alg,
                  cusparseOperation_t transA, IndexType m, IndexType n,
@@ -380,6 +384,9 @@ GKO_BIND_CUSPARSE_SPMV_BUFFERSIZE(std::complex<double>);
 #undef GKO_BIND_CUSPARSE_SPMV_BUFFERSIZE
 
 
+#endif  // defined(CUDA_VERSION) && (CUDA_VERSION < 11021)
+
+
 #if defined(CUDA_VERSION) && (CUDA_VERSION < 11000)
 
 
@@ -408,12 +415,6 @@ GKO_BIND_CUSPARSE32_SPMV(ValueType, detail::not_implemented);
 #undef GKO_BIND_CUSPARSE32_SPMV
 
 
-#endif  // defined(CUDA_VERSION) && (CUDA_VERSION < 11000)
-
-
-#if defined(CUDA_VERSION) && (CUDA_VERSION < 11000)
-
-
 template <typename ValueType, typename IndexType>
 void spgemm_buffer_size(
     cusparseHandle_t handle, IndexType m, IndexType n, IndexType k,
@@ -947,6 +948,9 @@ inline void destroy(cusparseSpSMDescr_t info)
 #endif  // defined(CUDA_VERSION) && (CUDA_VERSION >= 11000)
 
 
+#if defined(CUDA_VERSION) && (CUDA_VERSION < 11031)
+
+
 inline csrsm2Info_t create_solve_info()
 {
     csrsm2Info_t info{};
@@ -961,6 +965,9 @@ inline void destroy(csrsm2Info_t info)
 }
 
 
+#endif  // defined(CUDA_VERSION) && (CUDA_VERSION < 11031)
+
+
 inline csrilu02Info_t create_ilu0_info()
 {
     csrilu02Info_t info{};
@@ -989,6 +996,9 @@ inline void destroy(csric02Info_t info)
 }
 
 
+#if (defined(CUDA_VERSION) && (CUDA_VERSION < 11031))
+
+
 #define GKO_BIND_CUSPARSE32_BUFFERSIZEEXT(ValueType, CusparseName)            \
     inline void buffer_size_ext(                                              \
         cusparseHandle_t handle, int algo, cusparseOperation_t trans1,        \
@@ -1144,7 +1154,7 @@ GKO_BIND_CUSPARSE64_CSRSM2_SOLVE(ValueType, detail::not_implemented);
 #undef GKO_BIND_CUSPARSE64_CSRSM2_SOLVE
 
 
-#if (defined(CUDA_VERSION) && (CUDA_VERSION >= 11031))
+#else  // if (defined(CUDA_VERSION) && (CUDA_VERSION >= 11031))
 
 
 template <typename ValueType>

diff --git a/cuda/matrix/coo_kernels.cu b/cuda/matrix/coo_kernels.cu
@@ -68,7 +68,6 @@ namespace cuda {
 namespace coo {
 
 
-constexpr int default_block_size = 512;
 constexpr int warps_in_block = 4;
 constexpr int spmv_block_size = warps_in_block * config::warp_size;
 

diff --git a/cuda/matrix/csr_kernels.cu b/cuda/matrix/csr_kernels.cu
@@ -363,7 +363,11 @@ bool try_general_sparselib_spmv(std::shared_ptr<const CudaExecutor> exec,
     if (b->get_stride() == 1 && c->get_stride() == 1) {
         auto vecb = cusparse::create_dnvec(b->get_size()[0], b_val);
         auto vecc = cusparse::create_dnvec(c->get_size()[0], c_val);
-        cusparseSpMVAlg_t alg = CUSPARSE_CSRMV_ALG1;
+#if CUDA_VERSION >= 11021
+        constexpr auto alg = CUSPARSE_SPMV_CSR_ALG1;
+#else
+        constexpr auto alg = CUSPARSE_CSRMV_ALG1;
+#endif
         size_type buffer_size = 0;
         cusparse::spmv_buffersize<ValueType>(handle, trans, alpha, mat, vecb,
                                              beta, vecc, alg, &buffer_size);

diff --git a/cuda/matrix/dense_kernels.cu b/cuda/matrix/dense_kernels.cu
@@ -373,16 +373,14 @@ void convert_to_sellp(std::shared_ptr<const DefaultExecutor> exec,
 
     auto vals = result->get_values();
     auto col_idxs = result->get_col_idxs();
-    auto slice_lengths = result->get_slice_lengths();
     auto slice_sets = result->get_slice_sets();
     const auto slice_size = result->get_slice_size();
 
     auto grid_dim = ceildiv(num_rows, default_block_size / config::warp_size);
     if (grid_dim > 0) {
         kernel::fill_in_sellp<<<grid_dim, default_block_size>>>(
             num_rows, num_cols, slice_size, stride,
-            as_cuda_type(source->get_const_values()),
-            as_cuda_type(slice_lengths), as_cuda_type(slice_sets),
+            as_cuda_type(source->get_const_values()), as_cuda_type(slice_sets),
             as_cuda_type(col_idxs), as_cuda_type(vals));
     }
 }

diff --git a/hip/matrix/coo_kernels.hip.cpp b/hip/matrix/coo_kernels.hip.cpp
@@ -71,7 +71,6 @@ namespace hip {
 namespace coo {
 
 
-constexpr int default_block_size = 512;
 constexpr int warps_in_block = 4;
 constexpr int spmv_block_size = warps_in_block * config::warp_size;
 

diff --git a/hip/matrix/dense_kernels.hip.cpp b/hip/matrix/dense_kernels.hip.cpp
@@ -379,7 +379,6 @@ void convert_to_sellp(std::shared_ptr<const DefaultExecutor> exec,
 
     auto vals = result->get_values();
     auto col_idxs = result->get_col_idxs();
-    auto slice_lengths = result->get_slice_lengths();
     auto slice_sets = result->get_slice_sets();
 
     const auto slice_size = result->get_slice_size();
@@ -390,8 +389,8 @@ void convert_to_sellp(std::shared_ptr<const DefaultExecutor> exec,
         hipLaunchKernelGGL(kernel::fill_in_sellp, grid_dim, default_block_size,
                            0, 0, num_rows, num_cols, slice_size, stride,
                            as_hip_type(source->get_const_values()),
-                           as_hip_type(slice_lengths), as_hip_type(slice_sets),
-                           as_hip_type(col_idxs), as_hip_type(vals));
+                           as_hip_type(slice_sets), as_hip_type(col_idxs),
+                           as_hip_type(vals));
     }
 }