From d53f32aa84dc3c03c5d50615c10fd6b466d367ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fabien=20P=C3=A9an?= <fabien@pean.pro>
Date: Wed, 2 Aug 2023 22:46:24 +0200
Subject: [PATCH] Fix icb_parpack_[c|cpp] to split the problem across MPI
 processes

---
 CHANGES                               |  3 ++
 PARPACK/TESTS/MPI/icb_parpack_c.c     | 48 ++++++++++++++++---------
 PARPACK/TESTS/MPI/icb_parpack_cpp.cpp | 51 ++++++++++++++++++---------
 3 files changed, 69 insertions(+), 33 deletions(-)

diff --git a/CHANGES b/CHANGES
index a1a3a661d..7e37f57fb 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,5 +1,8 @@
 arpack-ng - 3.9.1
 
+[ Fabien Péan ]
+ * [BUG FIX] Tests for PARPACK with C/C++ bindings icb_parpack_c and icb_parpack_cpp are now really parallel and split the problem across MPI processes.
+
 [ Szabolcs Horvát ]
  * [BUG FIX] Ensure that LAPACK RNG state is propagated (regression in 3.9.0).
  * [BUG FIX] Ensure that separate random seeds are used on different parallel thread in D and S versions of functions (issue from original ARPACK).
diff --git a/PARPACK/TESTS/MPI/icb_parpack_c.c b/PARPACK/TESTS/MPI/icb_parpack_c.c
index b35fcb900..e19065efe 100644
--- a/PARPACK/TESTS/MPI/icb_parpack_c.c
+++ b/PARPACK/TESTS/MPI/icb_parpack_c.c
@@ -22,9 +22,9 @@
  * with entries 1000, 999, ... , 2, 1 on the diagonal.
  * */
 
-void dMatVec(const double* x, double* y) {
+void dMatVec(int start, int N, const double* x, double* y) {
   int i;
-  for (i = 0; i < 1000; ++i) y[i] = ((double)(i + 1)) * x[i];
+  for (i = 0; i < N; ++i) y[i] = ((double)(start + i + 1)) * x[i];
 };
 
 int ds() {
@@ -57,16 +57,24 @@ int ds() {
   char which[]  = "LM";
   char howmny[] = "A";
   
-  int rank;
-  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
   MPI_Fint MCW = MPI_Comm_c2f(MPI_COMM_WORLD);
-  
+  /// Split problem across each process/////////////////////
+  int rank, nprocs;
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
+  a_int N_local = N / nprocs;
+  if (rank < N % nprocs) // spread the remaining on each process
+    N_local = N_local + 1;
+  printf("rank: %d, size: %d\n", rank, N_local);
+  int start = rank*(N / nprocs) + ((rank < N % nprocs) ? rank : N % nprocs);
+  printf("rank: %d, start: %d\n", rank, start);
+  //////////////////////////////////////////////////////////
   a_int info = 0, ido = 0;
   do {
-    pdsaupd_c(MCW, &ido, bmat, N, which, nev, tol, resid, ncv, V, ldv, iparam,
+    pdsaupd_c(MCW, &ido, bmat, N_local, which, nev, tol, resid, ncv, V, ldv, iparam,
               ipntr, workd, workl, lworkl, &info);
 
-    dMatVec(&(workd[ipntr[0] - 1]), &(workd[ipntr[1] - 1]));
+    dMatVec(start, N_local, &(workd[ipntr[0] - 1]), &(workd[ipntr[1] - 1]));
   } while (ido == 1 || ido == -1);
 
   // check info and number of ev found by arpack.
@@ -75,7 +83,7 @@ int ds() {
     return 1;
   }
 
-  pdseupd_c(MCW, rvec, howmny, select, d, z, ldz, sigma, bmat, N, which, nev,
+  pdseupd_c(MCW, rvec, howmny, select, d, z, ldz, sigma, bmat, N_local, which, nev,
             tol, resid, ncv, V, ldv, iparam, ipntr, workd, workl, lworkl, &info);
   if (info < 0) {
     printf("Error in seupd: info %d\n", info);
@@ -94,9 +102,9 @@ int ds() {
   return 0;
 }
 
-void zMatVec(const a_dcomplex* x, a_dcomplex* y) {
+void zMatVec(int start, int N, const a_dcomplex* x, a_dcomplex* y) {
   int i;
-  for (i = 0; i < 1000; ++i) y[i] = x[i] * CMPLXF(i + 1.0, i + 1.0);
+  for (i = 0; i < N; ++i) y[i] = x[i] * CMPLXF(start + i + 1.0, start + i + 1.0);
 };
 
 int zn() {
@@ -131,16 +139,24 @@ int zn() {
   char which[]  = "LM";
   char howmny[] = "A";
   
-  int rank;
-  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
   MPI_Fint MCW = MPI_Comm_c2f(MPI_COMM_WORLD);
-
+  /// Split problem across each process/////////////////////
+  int rank, nprocs;
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
+  a_int N_local = N / nprocs;
+  if (rank < N % nprocs) // spread the remaining on each process
+    N_local = N_local + 1;
+  printf("rank: %d, size: %d\n", rank, N_local);
+  int start = rank*(N / nprocs) + ((rank < N % nprocs) ? rank : N % nprocs);
+  printf("rank: %d, start: %d\n", rank, start);
+  //////////////////////////////////////////////////////////
   a_int info = 0, ido = 0;
   do {
-    pznaupd_c(MCW, &ido, bmat, N, which, nev, tol, resid, ncv, V, ldv, iparam,
+    pznaupd_c(MCW, &ido, bmat, N_local, which, nev, tol, resid, ncv, V, ldv, iparam,
               ipntr, workd, workl, lworkl, rwork, &info);
 
-    zMatVec(&(workd[ipntr[0] - 1]), &(workd[ipntr[1] - 1]));
+    zMatVec(start, N_local, &(workd[ipntr[0] - 1]), &(workd[ipntr[1] - 1]));
   } while (ido == 1 || ido == -1);
   
   // check info and number of ev found by arpack.
@@ -149,7 +165,7 @@ int zn() {
     return 1;
   }
 
-  pzneupd_c(MCW, rvec, howmny, select, d, z, ldz, sigma, workev, bmat, N, which,
+  pzneupd_c(MCW, rvec, howmny, select, d, z, ldz, sigma, workev, bmat, N_local, which,
             nev, tol, resid, ncv, V, ldv, iparam, ipntr, workd, workl, lworkl,
             rwork, &info);
   if (info < 0) {
diff --git a/PARPACK/TESTS/MPI/icb_parpack_cpp.cpp b/PARPACK/TESTS/MPI/icb_parpack_cpp.cpp
index f55487783..58bf12ee1 100644
--- a/PARPACK/TESTS/MPI/icb_parpack_cpp.cpp
+++ b/PARPACK/TESTS/MPI/icb_parpack_cpp.cpp
@@ -20,9 +20,9 @@
 #include "stat_c.hpp"   // arpack statistics.
 
 template <typename Real>
-void diagonal_matrix_vector_product(const Real* x, Real* y) {
-  for (int i = 0; i < 1000; ++i) {
-    y[i] = static_cast<float>(i + 1) * x[i];
+void diagonal_matrix_vector_product(int start, int nloc, const Real* x, Real* y) {
+  for (int i = 0; i < nloc; ++i) {
+    y[i] = static_cast<Real>(start + i + 1) * x[i];
   }
 }
 
@@ -53,18 +53,27 @@ void real_symmetric_runner(double const& tol_check, arpack::which const& ritz_op
   iparam[3] = 1;      // NB, only 1 allowed
   iparam[6] = 1;      // mode
 
-  int rank;
-  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
   MPI_Fint MCW = MPI_Comm_c2f(MPI_COMM_WORLD);
+  /// Split problem across each process/////////////////////
+  int rank, nprocs;
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
+  a_int N_local = N / nprocs;
+  if (rank < N % nprocs) // spread the remaining on each process
+    N_local = N_local + 1;
+  std::cout<< "rank: " << rank << ", size :" << N_local << "\n";
+  int start = rank*(N / nprocs) + std::min(rank, N % nprocs);
+  std::cout<< "rank: " <<rank<< ", start:" << start<<"\n";
+  //////////////////////////////////////////////////////////
 
   a_int info = 0, ido = 0;
   do {
-    arpack::saupd(MCW, ido, arpack::bmat::identity, N,
+    arpack::saupd(MCW, ido, arpack::bmat::identity, N_local,
                   ritz_option, nev, tol, resid.data(), ncv,
                   V.data(), ldv, iparam, ipntr, workd.data(),
                   workl.data(), lworkl, info);
 
-    diagonal_matrix_vector_product(&(workd[ipntr[0] - 1]), &(workd[ipntr[1] - 1]));
+    diagonal_matrix_vector_product(start, N_local, &(workd[ipntr[0] - 1]), &(workd[ipntr[1] - 1]));
   } while  (ido == 1 || ido == -1);
 
   // check info and number of ev found by arpack.
@@ -75,7 +84,7 @@ void real_symmetric_runner(double const& tol_check, arpack::which const& ritz_op
   }
 
   arpack::seupd(MCW, rvec, arpack::howmny::ritz_vectors, select.data(),
-                d.data(), z.data(), ldz, sigma, arpack::bmat::identity, N,
+                d.data(), z.data(), ldz, sigma, arpack::bmat::identity, N_local,
                 ritz_option, nev, tol, resid.data(), ncv,
                 V.data(), ldv, iparam, ipntr, workd.data(),
                 workl.data(), lworkl, info);
@@ -94,11 +103,11 @@ void real_symmetric_runner(double const& tol_check, arpack::which const& ritz_op
 }
 
 template <typename Real>
-void diagonal_matrix_vector_product(const std::complex<Real>* x, std::complex<Real>* y) {
-  for (int i = 0; i < 1000; ++i) {
+void diagonal_matrix_vector_product(int start, int N_local, const std::complex<Real>* x, std::complex<Real>* y) {
+  for (int i = 0; i < N_local; ++i) {
     // Use complex matrix (i, -i) instead of (i, i): this way "largest_magnitude"
     // and "largest_imaginary" options produce different results that can be checked.
-    y[i] = x[i] * std::complex<Real>{Real(i + 1), -Real(i + 1)};
+    y[i] = x[i] * std::complex<Real>{Real(start + i + 1), -Real(start + i + 1)};
   }
 }
 
@@ -131,18 +140,26 @@ void complex_nonsymmetric_runner(double const& tol_check, arpack::which const& r
   iparam[3] = 1;       // NB, only 1 allowed
   iparam[6] = 1;       // mode
 
-  int rank;
-  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
   MPI_Fint MCW = MPI_Comm_c2f(MPI_COMM_WORLD);
-
+  /// Split problem across each process/////////////////////
+  int rank, nprocs;
+  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
+  a_int N_local = N / nprocs;
+  if (rank < N % nprocs) // spread the remaining on each process
+    N_local = N_local + 1;
+  std::cout<< "rank: "<<rank<<", size :"<<N_local<<"\n";
+  int start =  rank * (N / nprocs) + std::min(rank, N % nprocs);
+  std::cout<< "rank: " << rank << ", start:" << start << "\n";
+  //////////////////////////////////////////////////////////
   a_int info = 0, ido = 0;
   do {
-    arpack::naupd(MCW, ido, arpack::bmat::identity, N,
+    arpack::naupd(MCW, ido, arpack::bmat::identity, N_local,
                   ritz_option, nev, tol, resid.data(), ncv,
                   V.data(), ldv, iparam, ipntr, workd.data(),
                   workl.data(), lworkl, rwork.data(), info);
 
-    diagonal_matrix_vector_product(&(workd[ipntr[0] - 1]), &(workd[ipntr[1] - 1]));
+    diagonal_matrix_vector_product(start, N_local, &(workd[ipntr[0] - 1]), &(workd[ipntr[1] - 1]));
   } while (ido == 1 || ido == -1);
 
   // check info and number of ev found by arpack
@@ -154,7 +171,7 @@ void complex_nonsymmetric_runner(double const& tol_check, arpack::which const& r
 
   arpack::neupd(MCW, rvec, arpack::howmny::ritz_vectors, select.data(),
                 d.data(), z.data(), ldz, sigma, workev.data(),
-                arpack::bmat::identity, N, ritz_option,
+                arpack::bmat::identity, N_local, ritz_option,
                 nev, tol, resid.data(), ncv, V.data(), ldv, iparam,
                 ipntr, workd.data(), workl.data(), lworkl, rwork.data(), info);
   if (info < 0) throw std::runtime_error("Error in neupd, info " + std::to_string(info));