From 72170bd853fc5d9842c4e20586db748dd2386848 Mon Sep 17 00:00:00 2001
From: "kodiakhq[bot]" <49736102+kodiakhq[bot]@users.noreply.github.com>
Date: Tue, 18 Jul 2023 13:50:02 +0000
Subject: [PATCH] Improve benchmark scripts (#4753)

Follow up to our work with HPC Vega and the EESSI project.

Description of changes:
- add GPU support in P3M and LB
- remove MASS dependency in P3M
- adjust steepest descent for large particle numbers
---
 maintainer/benchmarks/lb.py                   | 21 ++++++------
 maintainer/benchmarks/lj.py                   |  4 +--
 maintainer/benchmarks/p3m.py                  | 34 +++++++++++--------
 testsuite/scripts/benchmarks/CMakeLists.txt   |  6 ++--
 .../scripts/benchmarks/test_ferrofluid.py     |  2 ++
 testsuite/scripts/benchmarks/test_lb.py       |  9 +++--
 testsuite/scripts/benchmarks/test_lj.py       |  2 ++
 testsuite/scripts/benchmarks/test_p3m.py      |  9 +++--
 8 files changed, 52 insertions(+), 35 deletions(-)

diff --git a/maintainer/benchmarks/lb.py b/maintainer/benchmarks/lb.py
index 6aede3a3239..f41ace68598 100644
--- a/maintainer/benchmarks/lb.py
+++ b/maintainer/benchmarks/lb.py
@@ -38,6 +38,8 @@
                     type=float, default=0.03, required=False,
                     help="Fraction of the simulation box volume occupied by "
                     "particles (range: [0.01-0.74], default: 0.50)")
+parser.add_argument("--gpu", default=False, action="store_true")
+parser.add_argument("--no-gpu", dest="gpu", action="store_false")
 parser.add_argument("--output", metavar="FILEPATH", action="store",
                     type=str, required=False, default="benchmarks.csv",
                     help="Output file (default: benchmarks.csv)")
@@ -51,6 +53,8 @@
     "volume_fraction exceeds the physical limit of sphere packing (~0.74)"
 
 required_features = ["LENNARD_JONES"]
+if args.gpu:
+    required_features.append("CUDA")
 espressomd.assert_features(required_features)
 
 # System
@@ -66,8 +70,7 @@
 
 # System parameters
 #############################################################
-
-n_proc = system.cell_system.get_state()['n_nodes']
+n_proc = system.cell_system.get_state()["n_nodes"]
 n_part = n_proc * args.particles_per_core
 # volume of N spheres with radius r: N * (4/3*pi*r^3)
 box_l = (n_part * 4. / 3. * np.pi * (lj_sig / 2.)**3
@@ -84,7 +87,6 @@
 #############################################################
 system.time_step = 0.01
 system.cell_system.skin = 0.5
-system.thermostat.turn_off()
 
 # Interaction setup
 #############################################################
@@ -121,14 +123,11 @@
 system.thermostat.turn_off()
 print(f"LB shape: [{lb_grid}, {lb_grid}, {lb_grid}]")
 print(f"LB agrid: {agrid:.3f}")
-if hasattr(espressomd.lb, "LBFluid"):
-    LBClass = espressomd.lb.LBFluid
-elif hasattr(espressomd.lb, "LBFluidWalberla"):
-    LBClass = espressomd.lb.LBFluidWalberla
-else: 
-    raise Exception("LB not built in")
-
-lbf = LBClass(agrid=agrid, dens=1, visc=1, tau=system.time_step, kT=1, seed=1)
+
+lb_class = espressomd.lb.LBFluid
+if args.gpu:
+    lb_class = espressomd.lb.LBFluidGPU
+lbf = lb_class(agrid=agrid, dens=1, visc=1, tau=system.time_step, kT=1, seed=1)
 system.actors.add(lbf)
 system.thermostat.set_lb(gamma=10, LB_fluid=lbf, seed=2)
 
diff --git a/maintainer/benchmarks/lj.py b/maintainer/benchmarks/lj.py
index a32737e02e2..c8d3fa9c074 100644
--- a/maintainer/benchmarks/lj.py
+++ b/maintainer/benchmarks/lj.py
@@ -73,8 +73,7 @@
 
 # System parameters
 #############################################################
-
-n_proc = system.cell_system.get_state()['n_nodes']
+n_proc = system.cell_system.get_state()["n_nodes"]
 n_part = n_proc * args.particles_per_core
 # volume of N spheres with radius r: N * (4/3*pi*r^3)
 box_l = (n_part * 4. / 3. * np.pi * (lj_sig / 2.)**3
@@ -88,7 +87,6 @@
 #############################################################
 system.time_step = 0.01
 system.cell_system.skin = 0.5
-system.thermostat.turn_off()
 
 # Interaction setup
 #############################################################
diff --git a/maintainer/benchmarks/p3m.py b/maintainer/benchmarks/p3m.py
index c511376be99..beb9e75ea55 100644
--- a/maintainer/benchmarks/p3m.py
+++ b/maintainer/benchmarks/p3m.py
@@ -33,8 +33,10 @@
                     help="Fraction of the simulation box volume occupied by "
                     "particles (range: [0.01-0.74], default: 0.25)")
 parser.add_argument("--prefactor", metavar="PREFACTOR", action="store",
-                    type=float, default=4., required=False,
+                    type=float, default=1., required=False,
                     help="P3M prefactor (default: 4)")
+parser.add_argument("--gpu", default=False, action="store_true")
+parser.add_argument("--no-gpu", dest="gpu", action="store_false")
 group = parser.add_mutually_exclusive_group()
 group.add_argument("--output", metavar="FILEPATH", action="store",
                    type=str, required=False, default="benchmarks.csv",
@@ -55,7 +57,9 @@
     assert measurement_steps >= 50, \
         f"{measurement_steps} steps per tick are too short"
 
-required_features = ["P3M", "LENNARD_JONES", "MASS"]
+required_features = ["P3M", "LENNARD_JONES"]
+if args.gpu:
+    required_features.append("CUDA")
 espressomd.assert_features(required_features)
 
 # make simulation deterministic
@@ -67,7 +71,6 @@
 
 # Interaction parameters (Lennard-Jones, Coulomb)
 #############################################################
-
 species = ["anion", "cation"]
 types = {"anion": 0, "cation": 0}
 charges = {"anion": -1.0, "cation": 1.0}
@@ -76,12 +79,10 @@
 WCA_cut = 2.**(1. / 6.)
 lj_cuts = {"anion": WCA_cut * lj_sigmas["anion"],
            "cation": WCA_cut * lj_sigmas["cation"]}
-masses = {"anion": 1.0, "cation": 1.0}
 
 # System parameters
 #############################################################
-
-n_proc = system.cell_system.get_state()['n_nodes']
+n_proc = system.cell_system.get_state()["n_nodes"]
 n_part = n_proc * args.particles_per_core
 # volume of N spheres with radius r: N * (4/3*pi*r^3)
 lj_sig = (lj_sigmas["cation"] + lj_sigmas["anion"]) / 2
@@ -96,12 +97,10 @@
 # Integration parameters
 #############################################################
 system.time_step = 0.01
-system.cell_system.skin = .4
-system.thermostat.turn_off()
+system.cell_system.skin = 0.5
 
 # Interaction setup
 #############################################################
-
 for i in range(len(species)):
     ion1 = species[i]
     for j in range(i, len(species)):
@@ -115,26 +114,32 @@
 
 # Particle setup
 #############################################################
-
+pid = 0
 for i in range(0, n_part, len(species)):
     for t in species:
         system.part.add(pos=np.random.random(3) * system.box_l,
-                        q=charges[t], type=types[t], mass=masses[t])
+                        id=pid, q=charges[t], type=types[t])
+        pid += 1
 
 #  Warmup Integration
 #############################################################
 
 # warmup
-benchmarks.minimize(system, n_part / 10.)
+benchmarks.minimize(system, n_part / 2.)
 
 system.integrator.set_vv()
 system.thermostat.set_langevin(kT=1.0, gamma=1.0, seed=42)
 
+p3m_class = espressomd.electrostatics.P3M
+if args.gpu:
+    p3m_class = espressomd.electrostatics.P3MGPU
+
 # tuning and equilibration
 min_skin = 0.2
 max_skin = 1.6
-p3m_params = {'prefactor': args.prefactor, 'accuracy': 1e-4}
-print("Equilibration")
+p3m_params = {"prefactor": args.prefactor, "accuracy": 1e-3}
+p3m = p3m_class(**p3m_params)
+print("Quick equilibration")
 system.integrator.run(min(3 * measurement_steps, 1000))
 print("Tune skin: {:.3f}".format(system.cell_system.tune_skin(
     min_skin=min_skin, max_skin=max_skin, tol=0.05, int_steps=100,
@@ -142,7 +147,6 @@
 print("Equilibration")
 system.integrator.run(min(3 * measurement_steps, 3000))
 print("Tune p3m")
-p3m = espressomd.electrostatics.P3M(**p3m_params)
 system.actors.add(p3m)
 print("Equilibration")
 system.integrator.run(min(3 * measurement_steps, 3000))
diff --git a/testsuite/scripts/benchmarks/CMakeLists.txt b/testsuite/scripts/benchmarks/CMakeLists.txt
index e6e77411dee..c5880959cb1 100644
--- a/testsuite/scripts/benchmarks/CMakeLists.txt
+++ b/testsuite/scripts/benchmarks/CMakeLists.txt
@@ -41,8 +41,10 @@ add_custom_target(
           ${CMAKE_SOURCE_DIR}/maintainer/benchmarks ${BENCHMARKS_DIR})
 
 benchmark_test(FILE test_lj.py)
-benchmark_test(FILE test_lb.py)
-benchmark_test(FILE test_p3m.py)
+benchmark_test(FILE test_lb.py SUFFIX cpu)
+benchmark_test(FILE test_lb.py SUFFIX gpu LABELS "gpu")
+benchmark_test(FILE test_p3m.py SUFFIX cpu)
+benchmark_test(FILE test_p3m.py SUFFIX gpu LABELS "gpu")
 benchmark_test(FILE test_ferrofluid.py)
 benchmark_test(FILE test_mc_acid_base_reservoir.py)
 
diff --git a/testsuite/scripts/benchmarks/test_ferrofluid.py b/testsuite/scripts/benchmarks/test_ferrofluid.py
index 10dc522c7d0..fde6e4289c5 100644
--- a/testsuite/scripts/benchmarks/test_ferrofluid.py
+++ b/testsuite/scripts/benchmarks/test_ferrofluid.py
@@ -1,3 +1,4 @@
+#
 # Copyright (C) 2019-2022 The ESPResSo project
 #
 # This file is part of ESPResSo.
@@ -14,6 +15,7 @@
 #
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
 
 import unittest as ut
 import importlib_wrapper
diff --git a/testsuite/scripts/benchmarks/test_lb.py b/testsuite/scripts/benchmarks/test_lb.py
index fd15a7cab57..97d630b654c 100644
--- a/testsuite/scripts/benchmarks/test_lb.py
+++ b/testsuite/scripts/benchmarks/test_lb.py
@@ -1,3 +1,4 @@
+#
 # Copyright (C) 2019-2022 The ESPResSo project
 #
 # This file is part of ESPResSo.
@@ -14,6 +15,7 @@
 #
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
 
 import unittest as ut
 import importlib_wrapper
@@ -22,9 +24,12 @@
 # make simulation deterministic
 np.random.seed(42)
 
+gpu = "gpu" in "@TEST_LABELS@".split(";")
+cmd_arguments = ["--particles_per_core", "80", "--gpu" if gpu else "--no-gpu"]
 benchmark, skipIfMissingFeatures = importlib_wrapper.configure_and_import(
-    "@BENCHMARKS_DIR@/lb.py", cmd_arguments=["--particles_per_core", "80"],
-    measurement_steps=200, n_iterations=2, min_skin=0.688, max_skin=0.688)
+    "@BENCHMARKS_DIR@/lb.py", gpu=gpu, measurement_steps=200, n_iterations=2,
+    min_skin=0.688, max_skin=0.688,
+    cmd_arguments=cmd_arguments, script_suffix="@TEST_SUFFIX@")
 
 
 @skipIfMissingFeatures
diff --git a/testsuite/scripts/benchmarks/test_lj.py b/testsuite/scripts/benchmarks/test_lj.py
index d65ce8ed0fb..1f7a832b205 100644
--- a/testsuite/scripts/benchmarks/test_lj.py
+++ b/testsuite/scripts/benchmarks/test_lj.py
@@ -1,3 +1,4 @@
+#
 # Copyright (C) 2019-2022 The ESPResSo project
 #
 # This file is part of ESPResSo.
@@ -14,6 +15,7 @@
 #
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
 
 import unittest as ut
 import importlib_wrapper
diff --git a/testsuite/scripts/benchmarks/test_p3m.py b/testsuite/scripts/benchmarks/test_p3m.py
index fc4247a5046..c5b1f5631f0 100644
--- a/testsuite/scripts/benchmarks/test_p3m.py
+++ b/testsuite/scripts/benchmarks/test_p3m.py
@@ -1,3 +1,4 @@
+#
 # Copyright (C) 2019-2022 The ESPResSo project
 #
 # This file is part of ESPResSo.
@@ -14,6 +15,7 @@
 #
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
 
 import unittest as ut
 import importlib_wrapper
@@ -22,9 +24,12 @@
 # make simulation deterministic
 np.random.seed(42)
 
+gpu = "gpu" in "@TEST_LABELS@".split(";")
+cmd_arguments = ["--particles_per_core", "400", "--gpu" if gpu else "--no-gpu"]
 benchmark, skipIfMissingFeatures = importlib_wrapper.configure_and_import(
-    "@BENCHMARKS_DIR@/p3m.py", cmd_arguments=["--particles_per_core", "400"],
-    measurement_steps=100, n_iterations=2, min_skin=0.262, max_skin=0.262,
+    "@BENCHMARKS_DIR@/p3m.py", gpu=gpu, measurement_steps=100, n_iterations=2,
+    cmd_arguments=cmd_arguments, script_suffix="@TEST_SUFFIX@",
+    min_skin=0.262, max_skin=0.262,
     p3m_params={'prefactor': 4, 'accuracy': 1e-4, 'cao': 7, 'r_cut': 3.182,
                 'mesh': [24, 24, 24], 'alpha': 1.02742, 'tune': False})