From d53f32aa84dc3c03c5d50615c10fd6b466d367ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabien=20P=C3=A9an?= Date: Wed, 2 Aug 2023 22:46:24 +0200 Subject: [PATCH] Fix icb_parpack_[c|cpp] to split the problem across MPI processes --- CHANGES | 3 ++ PARPACK/TESTS/MPI/icb_parpack_c.c | 48 ++++++++++++++++--------- PARPACK/TESTS/MPI/icb_parpack_cpp.cpp | 51 ++++++++++++++++++--------- 3 files changed, 69 insertions(+), 33 deletions(-) diff --git a/CHANGES b/CHANGES index a1a3a661d..7e37f57fb 100644 --- a/CHANGES +++ b/CHANGES @@ -1,5 +1,8 @@ arpack-ng - 3.9.1 +[ Fabien Péan ] + * [BUG FIX] Tests for PARPACK with C/C++ bindings icb_parpack_c and icb_parpack_cpp are now really parallel and split the problem across MPI processes. + [ Szabolcs Horvát ] * [BUG FIX] Ensure that LAPACK RNG state is propagated (regression in 3.9.0). * [BUG FIX] Ensure that separate random seeds are used on different parallel thread in D and S versions of functions (issue from original ARPACK). diff --git a/PARPACK/TESTS/MPI/icb_parpack_c.c b/PARPACK/TESTS/MPI/icb_parpack_c.c index b35fcb900..e19065efe 100644 --- a/PARPACK/TESTS/MPI/icb_parpack_c.c +++ b/PARPACK/TESTS/MPI/icb_parpack_c.c @@ -22,9 +22,9 @@ * with entries 1000, 999, ... , 2, 1 on the diagonal. * */ -void dMatVec(const double* x, double* y) { +void dMatVec(int start, int N, const double* x, double* y) { int i; - for (i = 0; i < 1000; ++i) y[i] = ((double)(i + 1)) * x[i]; + for (i = 0; i < N; ++i) y[i] = ((double)(start + i + 1)) * x[i]; }; int ds() { @@ -57,16 +57,24 @@ int ds() { char which[] = "LM"; char howmny[] = "A"; - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Fint MCW = MPI_Comm_c2f(MPI_COMM_WORLD); - + /// Split problem across each process///////////////////// + int rank, nprocs; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + a_int N_local = N / nprocs; + if (rank < N % nprocs) // spread the remaining on each process + N_local = N_local + 1; + printf("rank: %d, size: %d\n", rank, N_local); + int start = rank*(N / nprocs) + ((rank < N % nprocs) ? rank : N % nprocs); + printf("rank: %d, start: %d\n", rank, start); + ////////////////////////////////////////////////////////// a_int info = 0, ido = 0; do { - pdsaupd_c(MCW, &ido, bmat, N, which, nev, tol, resid, ncv, V, ldv, iparam, + pdsaupd_c(MCW, &ido, bmat, N_local, which, nev, tol, resid, ncv, V, ldv, iparam, ipntr, workd, workl, lworkl, &info); - dMatVec(&(workd[ipntr[0] - 1]), &(workd[ipntr[1] - 1])); + dMatVec(start, N_local, &(workd[ipntr[0] - 1]), &(workd[ipntr[1] - 1])); } while (ido == 1 || ido == -1); // check info and number of ev found by arpack. @@ -75,7 +83,7 @@ int ds() { return 1; } - pdseupd_c(MCW, rvec, howmny, select, d, z, ldz, sigma, bmat, N, which, nev, + pdseupd_c(MCW, rvec, howmny, select, d, z, ldz, sigma, bmat, N_local, which, nev, tol, resid, ncv, V, ldv, iparam, ipntr, workd, workl, lworkl, &info); if (info < 0) { printf("Error in seupd: info %d\n", info); @@ -94,9 +102,9 @@ int ds() { return 0; } -void zMatVec(const a_dcomplex* x, a_dcomplex* y) { +void zMatVec(int start, int N, const a_dcomplex* x, a_dcomplex* y) { int i; - for (i = 0; i < 1000; ++i) y[i] = x[i] * CMPLXF(i + 1.0, i + 1.0); + for (i = 0; i < N; ++i) y[i] = x[i] * CMPLXF(start + i + 1.0, start + i + 1.0); }; int zn() { @@ -131,16 +139,24 @@ int zn() { char which[] = "LM"; char howmny[] = "A"; - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Fint MCW = MPI_Comm_c2f(MPI_COMM_WORLD); - + /// Split problem across each process///////////////////// + int rank, nprocs; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + a_int N_local = N / nprocs; + if (rank < N % nprocs) // spread the remaining on each process + N_local = N_local + 1; + printf("rank: %d, size: %d\n", rank, N_local); + int start = rank*(N / nprocs) + ((rank < N % nprocs) ? rank : N % nprocs); + printf("rank: %d, start: %d\n", rank, start); + ////////////////////////////////////////////////////////// a_int info = 0, ido = 0; do { - pznaupd_c(MCW, &ido, bmat, N, which, nev, tol, resid, ncv, V, ldv, iparam, + pznaupd_c(MCW, &ido, bmat, N_local, which, nev, tol, resid, ncv, V, ldv, iparam, ipntr, workd, workl, lworkl, rwork, &info); - zMatVec(&(workd[ipntr[0] - 1]), &(workd[ipntr[1] - 1])); + zMatVec(start, N_local, &(workd[ipntr[0] - 1]), &(workd[ipntr[1] - 1])); } while (ido == 1 || ido == -1); // check info and number of ev found by arpack. @@ -149,7 +165,7 @@ int zn() { return 1; } - pzneupd_c(MCW, rvec, howmny, select, d, z, ldz, sigma, workev, bmat, N, which, + pzneupd_c(MCW, rvec, howmny, select, d, z, ldz, sigma, workev, bmat, N_local, which, nev, tol, resid, ncv, V, ldv, iparam, ipntr, workd, workl, lworkl, rwork, &info); if (info < 0) { diff --git a/PARPACK/TESTS/MPI/icb_parpack_cpp.cpp b/PARPACK/TESTS/MPI/icb_parpack_cpp.cpp index f55487783..58bf12ee1 100644 --- a/PARPACK/TESTS/MPI/icb_parpack_cpp.cpp +++ b/PARPACK/TESTS/MPI/icb_parpack_cpp.cpp @@ -20,9 +20,9 @@ #include "stat_c.hpp" // arpack statistics. template -void diagonal_matrix_vector_product(const Real* x, Real* y) { - for (int i = 0; i < 1000; ++i) { - y[i] = static_cast(i + 1) * x[i]; +void diagonal_matrix_vector_product(int start, int nloc, const Real* x, Real* y) { + for (int i = 0; i < nloc; ++i) { + y[i] = static_cast(start + i + 1) * x[i]; } } @@ -53,18 +53,27 @@ void real_symmetric_runner(double const& tol_check, arpack::which const& ritz_op iparam[3] = 1; // NB, only 1 allowed iparam[6] = 1; // mode - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Fint MCW = MPI_Comm_c2f(MPI_COMM_WORLD); + /// Split problem across each process///////////////////// + int rank, nprocs; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + a_int N_local = N / nprocs; + if (rank < N % nprocs) // spread the remaining on each process + N_local = N_local + 1; + std::cout<< "rank: " << rank << ", size :" << N_local << "\n"; + int start = rank*(N / nprocs) + std::min(rank, N % nprocs); + std::cout<< "rank: " < -void diagonal_matrix_vector_product(const std::complex* x, std::complex* y) { - for (int i = 0; i < 1000; ++i) { +void diagonal_matrix_vector_product(int start, int N_local, const std::complex* x, std::complex* y) { + for (int i = 0; i < N_local; ++i) { // Use complex matrix (i, -i) instead of (i, i): this way "largest_magnitude" // and "largest_imaginary" options produce different results that can be checked. - y[i] = x[i] * std::complex{Real(i + 1), -Real(i + 1)}; + y[i] = x[i] * std::complex{Real(start + i + 1), -Real(start + i + 1)}; } } @@ -131,18 +140,26 @@ void complex_nonsymmetric_runner(double const& tol_check, arpack::which const& r iparam[3] = 1; // NB, only 1 allowed iparam[6] = 1; // mode - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Fint MCW = MPI_Comm_c2f(MPI_COMM_WORLD); - + /// Split problem across each process///////////////////// + int rank, nprocs; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + a_int N_local = N / nprocs; + if (rank < N % nprocs) // spread the remaining on each process + N_local = N_local + 1; + std::cout<< "rank: "<