Skip to content

Commit

Permalink
Merge #2719
Browse files Browse the repository at this point in the history
2719: BH-gpu magnetostatics: multiple CPU support and refactoring r=fweik a=psci2195

Fixes #2500
In general, this fix is important cause it allows a user to parallelize both CPU and GPU calculations: EOM integration and magnetostatics respectively.

Description of changes:
 - Correct dipole prefactor broadcasting among the nodes
 - New test dds-and-bh-gpu.py is running over 4 CPUs: a comparison between BH and DDS gpu calculations. A new test has been needed because we cannot use dds-cpu as a baseline cause it does not support multiple CPUs. 
- Some technical debt has been caught up here as well: smart pointers based BH, the AMD GPU skip cleanup, per node test seeding. 

PR Checklist
------------
 - [ ] Tests?
   - [ ] Interface
   - [ ] Core 
 - [ ] Docs?


Co-authored-by: Bogdan Tanygin <b.m.tanygin@gmail.com>
Co-authored-by: Brian Tanygin <b.m.tanygin@gmail.com>
  • Loading branch information
bors[bot] and bogdan-tanygin committed Apr 13, 2019
2 parents 51e344c + 21f4bb1 commit 01d1b12
Show file tree
Hide file tree
Showing 5 changed files with 172 additions and 17 deletions.
23 changes: 10 additions & 13 deletions src/core/actor/DipolarBarnesHut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,28 +27,25 @@

#ifdef DIPOLAR_BARNES_HUT

std::unique_ptr<DipolarBarnesHut> dipolarBarnesHut;

void activate_dipolar_barnes_hut(float epssq, float itolsq) {
delete dipolarBarnesHut;
dipolarBarnesHut = nullptr;
// also necessary on 1 CPU or GPU, does more than just broadcasting
dipole.method = DIPOLAR_BH_GPU;
mpi_bcast_coulomb_params();
dipolarBarnesHut =
new DipolarBarnesHut(espressoSystemInterface, epssq, itolsq);
forceActors.push_back(dipolarBarnesHut);
energyActors.push_back(dipolarBarnesHut);

dipole.method = DIPOLAR_BH_GPU;
dipolarBarnesHut = std::make_unique<DipolarBarnesHut>(espressoSystemInterface,
epssq, itolsq);
forceActors.push_back(dipolarBarnesHut.get());
energyActors.push_back(dipolarBarnesHut.get());
}

void deactivate_dipolar_barnes_hut() {
if (dipolarBarnesHut) {
forceActors.remove(dipolarBarnesHut);
energyActors.remove(dipolarBarnesHut);
forceActors.remove(dipolarBarnesHut.get());
energyActors.remove(dipolarBarnesHut.get());
dipolarBarnesHut.reset();
}
delete dipolarBarnesHut;
dipolarBarnesHut = nullptr;
}

DipolarBarnesHut *dipolarBarnesHut = nullptr;

#endif
2 changes: 1 addition & 1 deletion src/core/actor/DipolarBarnesHut.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ class DipolarBarnesHut : public Actor {
void activate_dipolar_barnes_hut(float epssq, float itolsq);
void deactivate_dipolar_barnes_hut();

extern DipolarBarnesHut *dipolarBarnesHut;
extern std::unique_ptr<DipolarBarnesHut> dipolarBarnesHut;

#endif

Expand Down
1 change: 1 addition & 0 deletions testsuite/python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ python_test(FILE coulomb_tuning.py MAX_NUM_PROC 4 LABELS gpu)
python_test(FILE correlation.py MAX_NUM_PROC 4)
python_test(FILE dawaanr-and-dds-gpu.py MAX_NUM_PROC 1 LABELS gpu)
python_test(FILE dawaanr-and-bh-gpu.py MAX_NUM_PROC 1 LABELS gpu)
python_test(FILE dds-and-bh-gpu.py MAX_NUM_PROC 4 LABELS gpu)
python_test(FILE electrostaticInteractions.py MAX_NUM_PROC 2)
python_test(FILE engine_langevin.py MAX_NUM_PROC 4)
python_test(FILE engine_lb.py MAX_NUM_PROC 2)
Expand Down
3 changes: 0 additions & 3 deletions testsuite/python/dawaanr-and-bh-gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,6 @@ def run_test_case(self):
self.system.part.clear()

def test(self):
if str(espressomd.cuda_init.CudaInitHandle().device_list[0]) == "Device 687f":
print("Test skipped on AMD card")
return
if (self.system.cell_system.get_state()["n_nodes"] > 1):
print("NOTE: Ignoring testcase for n_nodes > 1")
else:
Expand Down
160 changes: 160 additions & 0 deletions testsuite/python/dds-and-bh-gpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
# Copyright (C) 2010-2018 The ESPResSo project
#
# This file is part of ESPResSo.
#
# ESPResSo is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# ESPResSo is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import print_function

import math
import unittest as ut
import numpy as np
from numpy import linalg as la
from numpy.random import random, seed

import espressomd
import espressomd.magnetostatics
import espressomd.analyze
import espressomd.cuda_init
import tests_common


def stopAll(system):
system.part[:].v = np.zeros(3)
system.part[:].omega_body = np.zeros(3)


@ut.skipIf(not espressomd.gpu_available() or not espressomd.has_features(["DIPOLAR_BARNES_HUT"]), "Features or gpu not available, skipping test!")
class BH_DDS_gpu_multCPU_test(ut.TestCase):
system = espressomd.System(box_l=[1, 1, 1])
# just some seeding based on 14
system.seed = [s * 14 for s in range(
system.cell_system.get_state()["n_nodes"])]
# just some seeding different from the previous one
np.random.seed(71)

def vectorsTheSame(self, a, b):
tol = 5E-2
vec_len = la.norm(a - b)
rel = 2 * vec_len / (la.norm(a) + la.norm(b))
return rel <= tol

def run_test_case(self):
pf_bh_gpu = 2.34
pf_dds_gpu = 3.524
ratio_dawaanr_bh_gpu = pf_dds_gpu / pf_bh_gpu
l = 15
self.system.box_l = [l, l, l]
self.system.periodicity = [0, 0, 0]
self.system.time_step = 1E-4
self.system.cell_system.skin = 0.1

part_dip = np.zeros((3))

for n in [128, 541]:
dipole_modulus = 1.3
# scale the box for a large number of particles:
if n > 1000:
l *= (n / 541) ** (1 / 3.0)
for i in range(n):
part_pos = np.array(random(3)) * l
costheta = 2 * random() - 1
sintheta = np.sin(np.arccos(costheta))
phi = 2 * np.pi * random()
part_dip[0] = sintheta * np.cos(phi) * dipole_modulus
part_dip[1] = sintheta * np.sin(phi) * dipole_modulus
part_dip[2] = costheta * dipole_modulus
self.system.part.add(id=i, type=0, pos=part_pos, dip=part_dip, v=np.array(
[0, 0, 0]), omega_body=np.array([0, 0, 0]))

self.system.non_bonded_inter[0, 0].lennard_jones.set_params(
epsilon=10.0, sigma=0.5,
cutoff=0.55, shift="auto")
self.system.thermostat.set_langevin(kT=0.0, gamma=10.0, seed=42)
stopAll(self.system)
self.system.integrator.set_vv()

self.system.non_bonded_inter[0, 0].lennard_jones.set_params(
epsilon=0.0, sigma=0.0,
cutoff=-1, shift=0.0)

self.system.cell_system.skin = 0.0
self.system.time_step = 0.01
self.system.thermostat.turn_off()

# gamma should be zero in order to avoid the noise term in force
# and torque
self.system.thermostat.set_langevin(kT=1.297, gamma=0.0)

dds_gpu = espressomd.magnetostatics.DipolarDirectSumGpu(
prefactor=pf_dds_gpu)
self.system.actors.add(dds_gpu)
self.system.integrator.run(steps=0, recalc_forces=True)

dawaanr_f = []
dawaanr_t = []

for i in range(n):
dawaanr_f.append(self.system.part[i].f)
dawaanr_t.append(self.system.part[i].torque_lab)
dawaanr_e = espressomd.analyze.Analysis(
self.system).energy()["total"]

del dds_gpu
self.system.actors.clear()

self.system.integrator.run(steps=0, recalc_forces=True)
bh_gpu = espressomd.magnetostatics.DipolarBarnesHutGpu(
prefactor=pf_bh_gpu, epssq=200.0, itolsq=8.0)
self.system.actors.add(bh_gpu)
self.system.integrator.run(steps=0, recalc_forces=True)

bhgpu_f = []
bhgpu_t = []

for i in range(n):
bhgpu_f.append(self.system.part[i].f)
bhgpu_t.append(self.system.part[i].torque_lab)
bhgpu_e = espressomd.analyze.Analysis(
self.system).energy()["total"]

# compare
for i in range(n):
self.assertTrue(
self.vectorsTheSame(
np.array(
dawaanr_t[i]), ratio_dawaanr_bh_gpu * np.array(
bhgpu_t[i])),
msg='Torques on particle do not match. i={0} dawaanr_t={1} ratio_dawaanr_bh_gpu*bhgpu_t={2}'.format(i, np.array(dawaanr_t[i]), ratio_dawaanr_bh_gpu * np.array(bhgpu_t[i])))
self.assertTrue(
self.vectorsTheSame(
np.array(
dawaanr_f[i]), ratio_dawaanr_bh_gpu * np.array(
bhgpu_f[i])),
msg='Forces on particle do not match: i={0} dawaanr_f={1} ratio_dawaanr_bh_gpu*bhgpu_f={2}'.format(i, np.array(dawaanr_f[i]), ratio_dawaanr_bh_gpu * np.array(bhgpu_f[i])))
self.assertTrue(
abs(dawaanr_e - bhgpu_e * ratio_dawaanr_bh_gpu) <= abs(
1E-3 * dawaanr_e),
msg='Energies for dawaanr {0} and bh_gpu {1} do not match.'.format(dawaanr_e, ratio_dawaanr_bh_gpu * bhgpu_e))

self.system.integrator.run(steps=0, recalc_forces=True)

del bh_gpu
self.system.actors.clear()
self.system.part.clear()

def test(self):
self.run_test_case()

if __name__ == '__main__':
ut.main()

0 comments on commit 01d1b12

Please sign in to comment.