From d2a76abcdc5f8b8a1a687a52f350e0255d1bd817 Mon Sep 17 00:00:00 2001 From: Ivan Raikov Date: Tue, 31 Jan 2023 10:31:32 -0800 Subject: [PATCH] Implementation of subworlds in CoreNEURON (#2185) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Alexandru Săvulescu Co-authored-by: nrnhines --- .pre-commit-config.yaml | 2 +- src/coreneuron/mpi/lib/nrnmpi.cpp | 57 ++++++++++++- src/coreneuron/utils/utils.cpp | 6 ++ .../callbacks/nrncore_callbacks.cpp | 14 ++++ .../callbacks/nrncore_callbacks.h | 9 +++ src/nrnmpi/nrnmpi.cpp | 20 +++-- src/nrnmpi/nrnmpi_def_cinc | 4 + src/oc/nrnmpi.h | 15 ++-- src/parallel/bbs.cpp | 7 +- test/CMakeLists.txt | 28 +++++++ test/coreneuron/test_subworlds.py | 79 +++++++++++++++++++ test/parallel_tests/test_subworld.py | 5 +- 12 files changed, 230 insertions(+), 16 deletions(-) create mode 100644 test/coreneuron/test_subworlds.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9faf716bb4..84fbc72e50 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/psf/black - rev: 22.1.0 + rev: 22.12.0 hooks: - id: black language_version: python3 diff --git a/src/coreneuron/mpi/lib/nrnmpi.cpp b/src/coreneuron/mpi/lib/nrnmpi.cpp index bc84a969e5..eccb50a26a 100644 --- a/src/coreneuron/mpi/lib/nrnmpi.cpp +++ b/src/coreneuron/mpi/lib/nrnmpi.cpp @@ -20,6 +20,7 @@ #include namespace coreneuron { + MPI_Comm nrnmpi_world_comm; MPI_Comm nrnmpi_comm; int nrnmpi_numprocs_; @@ -34,6 +35,8 @@ static void nrn_fatal_error(const char* msg) { nrnmpi_abort_impl(-1); } +void corenrn_subworld(); + nrnmpi_init_ret_t nrnmpi_init_impl(int* pargc, char*** pargv, bool is_quiet) { // Execute at most once per launch. Avoid memory leak. static bool executed = false; @@ -54,10 +57,13 @@ nrnmpi_init_ret_t nrnmpi_init_impl(int* pargc, char*** pargv, bool is_quiet) { nrn_assert(MPI_Init(pargc, pargv) == MPI_SUCCESS); #endif } + nrn_assert(MPI_Comm_dup(MPI_COMM_WORLD, &nrnmpi_world_comm) == MPI_SUCCESS); nrn_assert(MPI_Comm_dup(nrnmpi_world_comm, &nrnmpi_comm) == MPI_SUCCESS); - nrn_assert(MPI_Comm_rank(nrnmpi_world_comm, &nrnmpi_myid_) == MPI_SUCCESS); - nrn_assert(MPI_Comm_size(nrnmpi_world_comm, &nrnmpi_numprocs_) == MPI_SUCCESS); + corenrn_subworld(); // split nrnmpi_comm if ParallelContext.subworlds has been used + nrn_assert(MPI_Comm_rank(nrnmpi_comm, &nrnmpi_myid_) == MPI_SUCCESS); + nrn_assert(MPI_Comm_size(nrnmpi_comm, &nrnmpi_numprocs_) == MPI_SUCCESS); + nrnmpi_spike_initialize(); if (nrnmpi_myid_ == 0 && !is_quiet) { @@ -82,6 +88,53 @@ void nrnmpi_finalize_impl(void) { } } +extern "C" { +extern void (*nrn2core_subworld_info_)(int&, int&, int&, int&, int&); +} + +void corenrn_subworld() { + // If ParallelContext.subworlds has been invoked, split the world + // communicator according to the subworld partitioning. + static int change_cnt{0}; + int nrn_subworld_change_cnt, nrn_subworld_index, nrn_subworld_rank, nrn_mpi_numprocs_subworld, + nrn_mpi_numprocs_world; + if (!nrn2core_subworld_info_) { + return; + } + (*nrn2core_subworld_info_)(nrn_subworld_change_cnt, + nrn_subworld_index, + nrn_subworld_rank, + nrn_mpi_numprocs_subworld, + nrn_mpi_numprocs_world); + if (nrn_subworld_change_cnt == change_cnt) { + return; + } + change_cnt = nrn_subworld_change_cnt; + + // clean up / free old nrn_mpi_comm + nrn_assert(MPI_Comm_free(&nrnmpi_comm) == MPI_SUCCESS); + + // ensure world size is the same as NEURON + int world_size{-1}; + nrn_assert(MPI_Comm_size(nrnmpi_world_comm, &world_size) == MPI_SUCCESS); + nrn_assert(world_size == nrn_mpi_numprocs_world); + + // create a new nrnmpi_comm based on subworld partitioning + nrn_assert( + MPI_Comm_split(nrnmpi_world_comm, nrn_subworld_index, nrn_subworld_rank, &nrnmpi_comm) == + MPI_SUCCESS); + + // assert that rank order and size is consistent between NEURON and CoreNEURON + int subworld_rank{-1}; + nrn_assert(MPI_Comm_rank(nrnmpi_comm, &subworld_rank) == MPI_SUCCESS); + nrn_assert(nrn_subworld_rank == subworld_rank); + + int subworld_size{-1}; + nrn_assert(MPI_Comm_size(nrnmpi_comm, &subworld_size) == MPI_SUCCESS); + nrn_assert(subworld_size == nrn_mpi_numprocs_subworld); +} + + // check if appropriate threading level supported (i.e. MPI_THREAD_FUNNELED) void nrnmpi_check_threading_support_impl() { int th = 0; diff --git a/src/coreneuron/utils/utils.cpp b/src/coreneuron/utils/utils.cpp index d3c76194de..533e8dbbea 100644 --- a/src/coreneuron/utils/utils.cpp +++ b/src/coreneuron/utils/utils.cpp @@ -31,4 +31,10 @@ double nrn_wtime() { return (time1.tv_sec + time1.tv_usec / 1.e6); } } + +extern "C" { +void (*nrn2core_subworld_info_)(int&, int&, int&); +} + + } // namespace coreneuron diff --git a/src/nrniv/nrncore_write/callbacks/nrncore_callbacks.cpp b/src/nrniv/nrncore_write/callbacks/nrncore_callbacks.cpp index ff6f7b9239..0c9b0a9016 100644 --- a/src/nrniv/nrncore_write/callbacks/nrncore_callbacks.cpp +++ b/src/nrniv/nrncore_write/callbacks/nrncore_callbacks.cpp @@ -1221,3 +1221,17 @@ void nrn2core_patternstim(void** info) { assert(ml.nodecount == 1); *info = nrn_patternstim_info_ref(ml.pdata[0]); } + + +// Info from NEURON subworlds at beginning of psolve. +void nrn2core_subworld_info(int& cnt, + int& subworld_index, + int& subworld_rank, + int& numprocs_subworld, + int& numprocs_world) { + cnt = nrnmpi_subworld_change_cnt; + subworld_index = nrnmpi_subworld_id; + subworld_rank = nrnmpi_myid; + numprocs_subworld = nrnmpi_numprocs_subworld; + numprocs_world = nrnmpi_numprocs_world; +} diff --git a/src/nrniv/nrncore_write/callbacks/nrncore_callbacks.h b/src/nrniv/nrncore_write/callbacks/nrncore_callbacks.h index 0bd6e86fa7..df07d8ef64 100644 --- a/src/nrniv/nrncore_write/callbacks/nrncore_callbacks.h +++ b/src/nrniv/nrncore_write/callbacks/nrncore_callbacks.h @@ -183,6 +183,13 @@ void nrn2core_PreSyn_flag(int tid, std::set& presyns_flag_true); // Direct transfer with respect to PatternStim void nrn2core_patternstim(void** info); +// Info from NEURON subworlds at beginning of psolve. +void nrn2core_subworld_info(int& cnt, + int& subworld_index, + int& subworld_rank, + int& subworld_size, + int& numprocs_world); + } // end of extern "C" static core2nrn_callback_t cnbs[] = { @@ -227,6 +234,8 @@ static core2nrn_callback_t cnbs[] = { {"nrn2core_patternstim_", (CNB) nrn2core_patternstim}, + {"nrn2core_subworld_info_", (CNB) nrn2core_subworld_info}, + {NULL, NULL}}; diff --git a/src/nrnmpi/nrnmpi.cpp b/src/nrnmpi/nrnmpi.cpp index e8facf8308..db7c54dfbe 100644 --- a/src/nrnmpi/nrnmpi.cpp +++ b/src/nrnmpi/nrnmpi.cpp @@ -145,6 +145,9 @@ for (i=0; i < *pargc; ++i) { nrnmpi_myid = nrnmpi_myid_bbs = nrnmpi_myid_world; nrnmpi_spike_initialize(); nrnmpi_use = 1; + nrnmpi_subworld_change_cnt = 0; // increment from within void nrnmpi_subworld_size(int n) + nrnmpi_subworld_id = 0; // Subworld index of current rank + nrnmpi_numprocs_subworld = nrnmpi_numprocs_bbs; // Size of subworld of current rank /*begin instrumentation*/ #if USE_HPM @@ -216,6 +219,8 @@ void nrnmpi_abort(int errcode) { } #if NRNMPI + + void nrnmpi_subworld_size(int n) { /* n is the (desired) size of a subworld (pc.nhost) */ /* A subworld (net) is contiguous */ @@ -254,6 +259,8 @@ void nrnmpi_subworld_size(int n) { asrt(MPI_Comm_size(nrnmpi_comm, &nrnmpi_numprocs)); asrt(MPI_Comm_rank(nrn_bbs_comm, &nrnmpi_myid_bbs)); asrt(MPI_Comm_size(nrn_bbs_comm, &nrnmpi_numprocs_bbs)); + nrnmpi_subworld_id = nrnmpi_myid_bbs; + nrnmpi_numprocs_subworld = nrnmpi_numprocs_bbs; } else if (n == nrnmpi_numprocs_world) { asrt(MPI_Group_incl(wg, 1, &r, &grp_bbs)); asrt(MPI_Comm_dup(nrnmpi_world_comm, &nrnmpi_comm)); @@ -267,6 +274,8 @@ void nrnmpi_subworld_size(int n) { nrnmpi_myid_bbs = -1; nrnmpi_numprocs_bbs = -1; } + nrnmpi_subworld_id = 0; + nrnmpi_numprocs_subworld = nrnmpi_numprocs; } else { int nw = nrnmpi_numprocs_world; int nb = nw / n; /* nrnmpi_numprocs_bbs */ @@ -300,15 +309,16 @@ void nrnmpi_subworld_size(int n) { asrt(MPI_Comm_rank(nrn_bbs_comm, &nrnmpi_myid_bbs)); asrt(MPI_Comm_size(nrn_bbs_comm, &nrnmpi_numprocs_bbs)); } else { -#if 1 nrnmpi_myid_bbs = -1; nrnmpi_numprocs_bbs = -1; -#else - nrnmpi_myid_bbs = r / n; - nrnmpi_numprocs_bbs = nb; -#endif + } + nrnmpi_subworld_id = r / n; + nrnmpi_numprocs_subworld = n; + if ((nw % n != 0) && (nrnmpi_subworld_id == (nb - 1))) { + nrnmpi_numprocs_subworld = nw % n; /* and the last will have pc.nhost = nw%n */ } } + nrnmpi_subworld_change_cnt++; asrt(MPI_Group_free(&wg)); } diff --git a/src/nrnmpi/nrnmpi_def_cinc b/src/nrnmpi/nrnmpi_def_cinc index b4c20dc6d8..d470eb34fb 100644 --- a/src/nrnmpi/nrnmpi_def_cinc +++ b/src/nrnmpi/nrnmpi_def_cinc @@ -5,6 +5,10 @@ int nrnmpi_numprocs_world = 1; int nrnmpi_myid_world = 0; int nrnmpi_numprocs_bbs = 1; int nrnmpi_myid_bbs = 0; +// increment from within void nrnmpi_subworld_size(int n) +int nrnmpi_subworld_change_cnt = 0; +int nrnmpi_subworld_id = -1; +int nrnmpi_numprocs_subworld = 1; int nrnmpi_nout_; int* nrnmpi_nin_; diff --git a/src/oc/nrnmpi.h b/src/oc/nrnmpi.h index c18669d8f4..69e4ee75a4 100644 --- a/src/oc/nrnmpi.h +++ b/src/oc/nrnmpi.h @@ -7,12 +7,15 @@ not easily coexist. ParallelContext.subworlds(nsmall) divides the world into nrnmpi_numprocs_world/small subworlds of size nsmall. */ -extern int nrnmpi_numprocs_world; /* size of entire world. total size of all subworlds */ -extern int nrnmpi_myid_world; /* rank in entire world */ -extern int nrnmpi_numprocs; /* size of subworld */ -extern int nrnmpi_myid; /* rank in subworld */ -extern int nrnmpi_numprocs_bbs; /* number of subworlds */ -extern int nrnmpi_myid_bbs; /* rank in nrn_bbs_comm of rank 0 of a subworld */ +extern int nrnmpi_numprocs_world; /* size of entire world. total size of all subworlds */ +extern int nrnmpi_myid_world; /* rank in entire world */ +extern int nrnmpi_numprocs; /* size of subworld */ +extern int nrnmpi_myid; /* rank in subworld */ +extern int nrnmpi_numprocs_bbs; /* number of subworlds */ +extern int nrnmpi_myid_bbs; /* rank in nrn_bbs_comm of rank 0 of a subworld */ +extern int nrnmpi_subworld_change_cnt; /* increment from within void nrnmpi_subworld_size(int n) */ +extern int nrnmpi_subworld_id; /* subworld index on all ranks */ +extern int nrnmpi_numprocs_subworld; /* number of ranks in subworld on all ranks */ typedef struct { int gid; diff --git a/src/parallel/bbs.cpp b/src/parallel/bbs.cpp index 553fd50d62..e04dd5cc04 100644 --- a/src/parallel/bbs.cpp +++ b/src/parallel/bbs.cpp @@ -56,7 +56,6 @@ void BBS::init(int) { if (!BBSImpl::started_) { BBSImpl::is_master_ = (nrnmpi_myid_bbs == 0) ? true : false; BBSImpl::master_works_ = true; - // printf("%d BBS::init is_master=%d\n", nrnmpi_myid_bbs, BBSImpl::is_master_); } // Just as with PVM which stored buffers on the bulletin board // so we have the following files to store MPI_PACKED buffers @@ -427,6 +426,12 @@ void BBSImpl::worker() { // forever request and execute commands double st, et; int id; + if (debug) { + printf("%d BBS::worker is_master=%d nrnmpi_myid = %d\n", + nrnmpi_myid_world, + is_master(), + nrnmpi_myid); + } if (!is_master()) { if (nrnmpi_myid_bbs == -1) { // wait for message from for (;;) { // the proper nrnmpi_myid == 0 diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c2137ab565..8ffa4e8328 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -346,6 +346,16 @@ if(NRN_ENABLE_PYTHON AND PYTEST_FOUND) ${MPIEXEC_POSTFLAGS} -notatty -python) + set(modtests_launch_py_mpi_subworlds + ${MPIEXEC_NAME} + ${MPIEXEC_NUMPROC_FLAG} + 6 + ${MPIEXEC_OVERSUBSCRIBE} + ${MPIEXEC_PREFLAGS} + special + ${MPIEXEC_POSTFLAGS} + -notatty + -python) else() set(modtests_preload_sanitizer PRELOAD_SANITIZER) set(modtests_launch_py ${PYTHON_EXECUTABLE} ${pytest}) @@ -359,6 +369,15 @@ if(NRN_ENABLE_PYTHON AND PYTEST_FOUND) ${preload_sanitizer_mpiexec} ${PYTHON_EXECUTABLE} ${MPIEXEC_POSTFLAGS}) + set(modtests_launch_py_mpi_subworlds + ${MPIEXEC_NAME} + ${MPIEXEC_NUMPROC_FLAG} + 6 + ${MPIEXEC_OVERSUBSCRIBE} + ${MPIEXEC_PREFLAGS} + ${preload_sanitizer_mpiexec} + ${PYTHON_EXECUTABLE} + ${MPIEXEC_POSTFLAGS}) endif() # External coreneuron can be used for testing but for simplicity we are testing only submodule @@ -590,6 +609,15 @@ if(NRN_ENABLE_PYTHON AND PYTEST_FOUND) ${MPIEXEC_NAME} ${MPIEXEC_NUMPROC_FLAG} 2 ${MPIEXEC_OVERSUBSCRIBE} ${MPIEXEC_PREFLAGS} special ${MPIEXEC_POSTFLAGS} -mpi -python ${PROJECT_SOURCE_DIR}/test/coreneuron/test_inputpresyn.py) + nrn_add_test( + GROUP coreneuron_modtests + NAME test_subworlds_py_${processor} + REQUIRES coreneuron ${processor} ${modtests_preload_sanitizer} + SCRIPT_PATTERNS test/coreneuron/test_subworlds.py + PROCESSORS 6 + ENVIRONMENT ${modtests_processor_env} ${nrnpython_mpi_env} + COVERAGE_FILE=.coverage.coreneuron_test_subworlds_py + COMMAND ${modtests_launch_py_mpi_subworlds} test/coreneuron/test_subworlds.py) endif() nrn_add_test_group( NAME nmodl_tests_coreneuron diff --git a/test/coreneuron/test_subworlds.py b/test/coreneuron/test_subworlds.py new file mode 100644 index 0000000000..8fc4320ae9 --- /dev/null +++ b/test/coreneuron/test_subworlds.py @@ -0,0 +1,79 @@ +import sys +from neuron import h + +h("""objref cvode""") +h.cvode = h.CVode() + +use_coreneuron = True +subsize = 3 + +if use_coreneuron: + from neuron import coreneuron + + coreneuron.enable = True + coreneuron.verbose = 0 + + +def test_subworlds(): + h.nrnmpi_init() + pc = h.ParallelContext() + pc.subworlds(subsize) + rank = int(pc.id()) + nhost = int(pc.nhost()) + + ibbs = pc.id_world() // subsize + nbbs = pc.nhost_world() // subsize + x = 0 if pc.nhost_world() % subsize == 0 else 1 + assert pc.nhost_bbs() == ((nbbs + x) if pc.id() == 0 else -1) + assert pc.id_bbs() == (ibbs if pc.id() == 0 else -1) + + if rank == 0: + soma = h.Section(name="soma") + soma.L = 20 + soma.diam = 20 + soma.insert("hh") + iclamp = h.IClamp(soma(0.5)) + iclamp.delay = 2 + iclamp.dur = 0.1 + + if pc.id_bbs() == 0: + iclamp.amp = 0.0 + elif pc.id_bbs() == 1: + iclamp.amp = 0.9 + else: + raise RuntimeError(f"Invalid subworld index {pc.id_bbs()}") + + rec_v = h.Vector() + rec_t = h.Vector() + rec_v.record(soma(0.5)._ref_v) # Membrane potential vector + rec_t.record(h._ref_t) + syn = h.ExpSyn(soma(0.5)) + spike_detector = h.NetCon(soma(0.5)._ref_v, None, sec=soma) + netstim = h.NetStim() + netstim.number = 1 + netstim.start = 0 + nc = h.NetCon(netstim, syn) + gid = 1 + pc.set_gid2node(gid, pc.id()) + pc.cell(gid, spike_detector) + + h.cvode.cache_efficient(1) + h.finitialize(-65) + pc.set_maxstep(10) + pc.psolve(250.0) + if rank == 0: + if pc.id_bbs() == 0: + assert rec_v.max() < 0.0 + elif pc.id_bbs() == 1: + assert rec_v.max() > 0.0 + else: + raise RuntimeError(f"Invalid subworld index {pc.id_bbs()}") + + print(f"subworld {pc.id_bbs()}: {rec_v.max()}") + + pc.done() + h.quit() + + +if __name__ == "__main__": + test_subworlds() diff --git a/test/parallel_tests/test_subworld.py b/test/parallel_tests/test_subworld.py index 0f7dfaf9e4..fc7f4df3b9 100644 --- a/test/parallel_tests/test_subworld.py +++ b/test/parallel_tests/test_subworld.py @@ -1,5 +1,7 @@ from neuron import h +import sys +h.nrnmpi_init() pc = h.ParallelContext() # each subworld has 3 ranks if nhost_world is multiple of subsize @@ -13,7 +15,7 @@ if pc.id_world() == 0: print("id_world nhost_world id_bbs nhost_bbs ibbs nbbs id nhost") -pc.barrier() + print( "%5d %9d %8d %8d %7d %3d %5d %4d" % ( @@ -30,6 +32,7 @@ assert pc.nhost() == (subsize if ibbs < nbbs else (pc.nhost_world() - subsize * nbbs)) assert pc.id() == pc.id_world() % subsize + # id_bbs and nhost_bbs for non-zero id not -1. assert pc.nhost_bbs() == ((nbbs + x) if pc.id() == 0 else -1) assert pc.id_bbs() == (ibbs if pc.id() == 0 else -1)