Skip to content

Commit

Permalink
distributed wfc support / misc updates (#20)
Browse files Browse the repository at this point in the history
* remove enable_language(CUDA)

* enable distributed wave-functions

* update spack recipe for intel-oneapi-mkl

* cmake: simplify blas/mkl

* add check-format

* remove kokkos <4 initialization
  • Loading branch information
simonpintarelli committed Jun 6, 2024
1 parent 44daefa commit 674039f
Show file tree
Hide file tree
Showing 63 changed files with 816 additions and 742 deletions.
15 changes: 15 additions & 0 deletions .github/workflows/check-format.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: Check source code formatting

on:
push: {}
pull_request: {}

jobs:
check:
runs-on: ubuntu-latest
container: zhongruoyu/llvm-ports:17.0.4-slim-focal
steps:
- uses: actions/checkout@v4
- name: Check .cpp and .hpp files
run: |
./check_format.sh
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ spack-build-*
*~undo-tree~
__pycache__/
compile_commands.json
build-linux-*
26 changes: 18 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ set(USE_OPENMP On CACHE BOOL "use OpenMP")
set(USE_CUDA Off CACHE BOOL "use cuda")
set(USE_ROCM Off CACHE BOOL "use amd gpus")
set(USE_MAGMA Off CACHE BOOL "use magma eigensolver for amd gpus")
set(USE_GPU_DIRECT Off CACHE BOOL "use gpu direct")

set(BUILD_TESTS OFF CACHE BOOL "build tests")
set(LAPACK_VENDOR "OpenBLAS" CACHE STRING "lapack vendor")
Expand All @@ -21,22 +22,19 @@ endif()
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS "YES")

# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")

include(cmake/nlcglib_macros.cmake)
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake/modules")

find_package(Kokkos)
if(USE_CUDA)
find_package(CUDAToolkit REQUIRED)
enable_language(CUDA)
include(cmake/cudalibs_target.cmake)
endif()

if(USE_ROCM)
include(cmake/rocmlibs_target.cmake)
endif()
find_package(Kokkos)

if(USE_MAGMA)
find_package(MAGMA REQUIRED)
Expand All @@ -53,12 +51,24 @@ if(LAPACK_VENDOR MATCHES OpenBLAS)
INTERFACE_INCLUDE_DIRECTORIES "${OpenBLAS_INCLUDE_DIRS}"
INTERFACE_LINK_LIBRARIES "${OpenBLAS_LIBRARIES}")
endif()
elseif(LAPACK_VENDOR MATCHES MKL)
message("LAPACK VENDOR MKL")
find_package(MKL REQUIRED)
elseif(LAPACK_VENDOR STREQUAL MKLONEAPI)
# set(MKL_THREADING gnu_thread)
message("LAPACK VENDOR MKL")
set(MKL_INTERFACE "lp64" CACHE STRING "")
set(MKL_THREADING "sequential" CACHE STRING "")
set(MKL_MPI "mpich" CACHE STRING "")
find_package(MKL CONFIG REQUIRED)
if(NOT TARGET nlcg::cpu_lapack)
add_library(nlcg::cpu_lapack INTERFACE IMPORTED)
target_link_libraries(nlcg::cpu_lapack INTERFACE MKL::MKL)
target_compile_definitions(nlcg::cpu_lapack INTERFACE __USE_MKL)
endif()
elseif(LAPACK_VENDOR STREQUAL MKL)
find_package(MKL REQUIRED NO_MODULE)
if(NOT TARGET nlcg::cpu_lapack)
add_library(nlcg::cpu_lapack INTERFACE IMPORTED)
target_link_libraries(nlcg::cpu_lapack INTERFACE mkl::mkl_intel_32bit_omp_dyn)
target_compile_definitions(nlcg::cpu_lapack INTERFACE __USE_MKL)
endif()
elseif(LAPACK_VENDOR STREQUAL CRAY_LIBSCI)
message("LAPACK VENDOR Cray Libsci")
find_package(SCI REQUIRED)
Expand Down
15 changes: 15 additions & 0 deletions check_format.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash

check_diff() {
local status=0
for file in "$@"; do
if ! diff -q "$file" <(clang-format "$file"); then
status=1
fi
done
return $status
}

export -f check_diff

find . -type f \( -name "*.cpp" -o -name "*.hpp" \) ! -path "./build-env/*" ! -path "./.*" -exec bash -c 'check_diff "$@"' sh {} +
28 changes: 8 additions & 20 deletions cmake/nlcglib_macros.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -2,42 +2,30 @@ MACRO(NLCGLIB_SETUP_TARGET _target)
target_link_libraries(
${_target} PUBLIC
Kokkos::kokkos
# ${LAPACK_LIBRARIES}
MPI::MPI_CXX
# $<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX>
$<TARGET_NAME_IF_EXISTS:OpenMP::OpenMP_CXX>
$<TARGET_NAME_IF_EXISTS:nlcglib::cudalibs>
$<TARGET_NAME_IF_EXISTS:nlcglib::rocmlibs>
$<TARGET_NAME_IF_EXISTS:nlcglib::magma>
$<TARGET_NAME_IF_EXISTS:roc::hipblas> # only required for magma
$<TARGET_NAME_IF_EXISTS:roc::hipsparse> # only required for magma
nlohmann_json::nlohmann_json
)
nlcg::cpu_lapack
)

target_include_directories(${_target} PUBLIC
${CMAKE_SOURCE_DIR}/src
${CMAKE_SOURCE_DIR}/include
)
)

if(USE_ROCM)
target_compile_options(${_target} PUBLIC --offload-arch=gfx90a)
endif()

if(LAPACK_VENDOR MATCHES MKL)
target_compile_definitions(${_target} PUBLIC __USE_MKL)
# if(USE_OPENMP)
target_link_libraries(${_target} PUBLIC mkl::mkl_intel_32bit_omp_dyn)
# else()
# target_link_libraries(${_target} PUBLIC mkl::mkl_intel_32bit_seq_dyn)
# endif()
elseif(LAPACK_VENDOR STREQUAL MKLONEAPI)
target_link_libraries(${_target} PUBLIC MKL::MKL)
else()
target_link_libraries(${_target} PRIVATE nlcg::cpu_lapack)
if(USE_ROCM)
target_compile_options(${_target} PUBLIC --offload-arch=gfx90a)
endif()

target_compile_definitions(${_target} PUBLIC $<$<BOOL:${USE_OPENMP}>:__USE_OPENMP>)
target_compile_definitions(${_target} PUBLIC $<$<BOOL:${USE_CUDA}>:__NLCGLIB__CUDA>)
target_compile_definitions(${_target} PUBLIC $<$<BOOL:${USE_ROCM}>:__NLCGLIB__ROCM>)
target_compile_definitions(${_target} PUBLIC $<$<BOOL:${USE_MAGMA}>:__NLCGLIB__MAGMA>)
target_compile_definitions(${_target} PUBLIC $<$<BOOL:${USE_GPU_DIRECT}>:__NLCGLIB__GPU_DIRECT>)
target_include_directories(${_target} PUBLIC $<TARGET_PROPERTY:Kokkos::kokkoscore,INTERFACE_INCLUDE_DIRECTORIES>)

ENDMACRO()
32 changes: 15 additions & 17 deletions include/interface.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

#include <array>
#include <complex>
#include <memory>
#include <functional>
#include <map>
#include <memory>
#include <stdexcept>
#include <functional>
#include <vector>
#include "mpi.h"

Expand All @@ -18,9 +18,8 @@ enum class memory_type
device
};

static std::map<memory_type, std::string> memory_names = {{memory_type::none, "none"},
{memory_type::host, "host"},
{memory_type::device, "device"}};
static std::map<memory_type, std::string> memory_names = {
{memory_type::none, "none"}, {memory_type::host, "host"}, {memory_type::device, "device"}};

enum class smearing_type
{
Expand Down Expand Up @@ -50,20 +49,18 @@ struct buffer_protocol
std::array<int, d> size,
T* data,
enum memory_type memtype,
MPI_Comm mpi_comm=MPI_COMM_SELF)
MPI_Comm mpi_comm = MPI_COMM_SELF)
: stride(std::move(stride))
, size(std::move(size))
, data(data)
, memtype(memtype)
, mpi_comm(mpi_comm)
{ /* empty */ }
{ /* empty */
}

// 1d constructor
// template<int k=dim, class=std::enable_if_t<k==1>>
buffer_protocol(int size,
T* data,
enum memory_type memtype,
MPI_Comm mpi_comm= MPI_COMM_SELF)
buffer_protocol(int size, T* data, enum memory_type memtype, MPI_Comm mpi_comm = MPI_COMM_SELF)
: buffer_protocol({1}, {size}, data, memtype, mpi_comm)
{
static_assert(d == 1, "not available.");
Expand All @@ -79,7 +76,7 @@ struct buffer_protocol
MPI_Comm mpi_comm{MPI_COMM_SELF};
};

template<int dim, class numeric_t>
template <int dim, class numeric_t>
class BufferBase
{
public:
Expand All @@ -100,7 +97,7 @@ class BufferBase
virtual kindex_t kpoint_index(int i) const = 0;
};

template<class numeric_t>
template <class numeric_t>
class BufferBase<0, numeric_t>
{
public:
Expand Down Expand Up @@ -140,23 +137,24 @@ class EnergyBase
virtual std::shared_ptr<MatrixBaseZ> get_sphi(memory_type) = 0;
virtual std::shared_ptr<MatrixBaseZ> get_C(memory_type) = 0;
virtual std::shared_ptr<VectorBaseZ> get_fn() = 0;
virtual void set_fn(const std::vector<std::pair<int, int>>&, const std::vector<std::vector<double>>&) = 0;
virtual void set_fn(const std::vector<std::pair<int, int>>&,
const std::vector<std::vector<double>>&) = 0;
virtual std::shared_ptr<VectorBaseZ> get_ek() = 0;
virtual std::shared_ptr<VectorBaseZ> get_gkvec_ekin() = 0;
virtual std::shared_ptr<ScalarBaseZ> get_kpoint_weights() = 0;
virtual void set_chemical_potential(double) = 0;
virtual double get_chemical_potential() = 0;
virtual void print_info() const = 0;
virtual MPI_Comm comm_world() const = 0;
};

class OpBase
{
public:
using key_t = std::pair<int, int>;

public:
virtual void apply(const key_t&,
MatrixBaseZ::buffer_t& out,
MatrixBaseZ::buffer_t& in) const = 0;
virtual void apply(const key_t&, MatrixBaseZ::buffer_t& out, MatrixBaseZ::buffer_t& in) const = 0;
virtual std::vector<key_t> get_keys() const = 0;
};

Expand Down
18 changes: 10 additions & 8 deletions include/nlcglib.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@

namespace nlcglib {

void initialize();
void finalize();
void
initialize();
void
finalize();

nlcg_info
nlcg_mvp2_cpu(EnergyBase& energy_base,
Expand All @@ -28,12 +30,12 @@ nlcg_mvp2_device(EnergyBase& energy_base,

nlcg_info
nlcg_mvp2_cpu_device(EnergyBase& energy_base,
smearing_type smearing,
double temp,
double tol,
double kappa,
double tau,
int maxiter,
smearing_type smearing,
double temp,
double tol,
double kappa,
double tau,
int maxiter,
int restart);
nlcg_info
nlcg_mvp2_device_cpu(EnergyBase& energy_base,
Expand Down
29 changes: 29 additions & 0 deletions spack/packages/nlcglib/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class Nlcglib(CMakePackage, CudaPackage, ROCmPackage):
description="CMake build type",
values=("Debug", "Release", "RelWithDebInfo"),
)
variant("gpu_direct", default=False)

depends_on("cmake@3.21:", type="build")
depends_on("mpi")
Expand All @@ -38,6 +39,10 @@ class Nlcglib(CMakePackage, CudaPackage, ROCmPackage):

depends_on("googletest", type="build", when="+tests")
depends_on("nlohmann-json")
depends_on("kokkos@4:", when="@1.1:")

# MKLConfig.cmake introduced in 2021.3
conflicts("intel-oneapi-mkl@:2021.2", when="^intel-oneapi-mkl")

with when("@:0.9"):
conflicts("+rocm")
Expand All @@ -59,6 +64,7 @@ def cmake_args(self):
self.define_from_variant("USE_OPENMP", "openmp"),
self.define_from_variant("BUILD_TESTS", "tests"),
self.define_from_variant("USE_ROCM", "rocm"),
self.define_from_variant("USE_GPU_DIRECT", "gpu_direct"),
self.define_from_variant("USE_MAGMA", "magma"),
self.define_from_variant("USE_CUDA", "cuda"),
]
Expand All @@ -67,6 +73,29 @@ def cmake_args(self):
options += [self.define("LAPACK_VENDOR", "MKL")]
elif self.spec["blas"].name in ["intel-oneapi-mkl"]:
options += [self.define("LAPACK_VENDOR", "MKLONEAPI")]
mkl_mapper = {
"threading": {
"none": "sequential",
"openmp": "gnu_thread",
"tbb": "tbb_thread",
},
"mpi": {"intel-mpi": "intelmpi", "mpich": "mpich", "openmpi": "openmpi"},
}

mkl_threads = mkl_mapper["threading"][self.spec["intel-oneapi-mkl"].variants["threads"].value]

mpi_provider = self.spec["mpi"].name
if mpi_provider in ["mpich", "cray-mpich", "mvapich", "mvapich2"]:
mkl_mpi = mkl_mapper["mpi"]["mpich"]
else:
mkl_mpi = mkl_mapper["mpi"][mpi_provider]

options.extend([
self.define("MKL_INTERFACE", "lp64"),
self.define("MKL_THREADING", mkl_threads),
self.define("MKL_MPI", mkl_mpi)
])

elif self.spec["blas"].name in ["openblas"]:
options += [self.define("LAPACK_VENDOR", "OpenBLAS")]
else:
Expand Down
11 changes: 0 additions & 11 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,6 @@ add_library(nlcglib SHARED nlcglib.cpp)
target_include_directories(nlcglib PUBLIC $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/include> $<INSTALL_INTERFACE:include>)
target_link_libraries(nlcglib PRIVATE nlcglib_core)

if(LAPACK_VENDOR MATCHES MKL)
target_compile_definitions(nlcglib PRIVATE __USE_MKL)
if(USE_OPENMP)
target_link_libraries(nlcglib PRIVATE mkl::mkl_intel_32bit_omp_dyn)
else()
target_link_libraries(nlcglib PRIVATE mkl::mkl_intel_32bit_seq_st)
endif()
else()
target_link_libraries(nlcglib PRIVATE nlcg::cpu_lapack)
endif()

set_target_properties(nlcglib PROPERTIES PUBLIC_HEADER
${CMAKE_SOURCE_DIR}/include/nlcglib.hpp
)
Expand Down
6 changes: 3 additions & 3 deletions src/constants.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@ namespace nlcglib {

namespace constants {
const double pi{3.1415926535897932385};
} // constants
} // namespace constants

namespace physical_constants {
const double kb{0.00000316681156340226};
} // physical_constants
} // namespace physical_constants

} // nlcglib
} // namespace nlcglib
4 changes: 2 additions & 2 deletions src/dft/newton_minimization_smearing.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ double
newton_minimization_chemical_potential(
Nt&& N, DNt&& dN, D2Nt&& ddN, double mu0, double ne, double tol, int maxstep = 1000)
{
// Newton finds the minimum, not necessarily N(mu) == ne, tolerate up to `tol_ne` difference in number of electrons
// if |N(mu_0) -ne| > tol_ne an error is thrown.
// Newton finds the minimum, not necessarily N(mu) == ne, tolerate up to `tol_ne` difference in
// number of electrons if |N(mu_0) -ne| > tol_ne an error is thrown.
const double tol_ne = 1e-2;

double mu = mu0;
Expand Down
2 changes: 1 addition & 1 deletion src/exceptions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ class DescentError : public std::exception
};


} // nlcglib
} // namespace nlcglib
4 changes: 2 additions & 2 deletions src/exec_space.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ template <>
struct exec<Kokkos::HostSpace>
{
#ifdef __USE_OPENMP
using type = Kokkos::OpenMP;
using type = Kokkos::OpenMP;
#else
using type = Kokkos::Serial;
#endif
Expand All @@ -40,4 +40,4 @@ template <class SPACE>
using exec_t = typename exec<SPACE>::type;


} // nlcglib
} // namespace nlcglib
Loading

0 comments on commit 674039f

Please sign in to comment.