From 78307ffe76b20a56848869486f8417b10880722c Mon Sep 17 00:00:00 2001 From: ptaylor Date: Thu, 20 Jul 2023 14:27:13 -0700 Subject: [PATCH] infer number of CUDA archs and adjust --parallel and --threads options in builds --- features/src/rapids-build-utils/.bashrc | 2 +- .../devcontainer-feature.json | 2 +- features/src/rapids-build-utils/install.sh | 1 + .../bin/get-jobs-and-archs.sh | 52 +++++++++++++++++++ .../bin/parse-cmake-var-from-args.sh | 8 +-- .../bin/tmpl/cpp-build.tmpl.sh | 14 +++-- .../bin/tmpl/python-build.tmpl.sh | 10 +++- 7 files changed, 80 insertions(+), 9 deletions(-) create mode 100755 features/src/rapids-build-utils/opt/rapids-build-utils/bin/get-jobs-and-archs.sh diff --git a/features/src/rapids-build-utils/.bashrc b/features/src/rapids-build-utils/.bashrc index 71362957..89a8f885 100644 --- a/features/src/rapids-build-utils/.bashrc +++ b/features/src/rapids-build-utils/.bashrc @@ -1,7 +1,7 @@ export CONDA_ALWAYS_YES="true"; export CC="${CC:-"$(which gcc)"}"; export CXX="${CXX:-"$(which g++)"}"; -export CUDAARCHS="${CUDAARCHS:-native}"; +export CUDAARCHS="${CUDAARCHS:-all-major}"; export CUDAHOSTCXX="${CUDAHOSTCXX:-"${CXX}"}"; export CMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE:-Release}"; export CMAKE_EXPORT_COMPILE_COMMANDS="${CMAKE_EXPORT_COMPILE_COMMANDS:-ON}"; diff --git a/features/src/rapids-build-utils/devcontainer-feature.json b/features/src/rapids-build-utils/devcontainer-feature.json index 2573a7d7..508028f0 100644 --- a/features/src/rapids-build-utils/devcontainer-feature.json +++ b/features/src/rapids-build-utils/devcontainer-feature.json @@ -1,7 +1,7 @@ { "name": "NVIDIA RAPIDS devcontainer build utilities", "id": "rapids-build-utils", - "version": "23.8.4", + "version": "23.8.5", "description": "A feature to install the RAPIDS devcontainer build utilities", "containerEnv": { "BASH_ENV": "/etc/bash.bash_env" diff --git a/features/src/rapids-build-utils/install.sh b/features/src/rapids-build-utils/install.sh index 71e7a5a8..d3c79bf2 100644 --- a/features/src/rapids-build-utils/install.sh +++ b/features/src/rapids-build-utils/install.sh @@ -43,6 +43,7 @@ update-alternatives --install /usr/bin/rapids-parse-cmake-vars-from-args rapids- update-alternatives --install /usr/bin/rapids-python-pkg-roots rapids-python-pkg-roots /opt/rapids-build-utils/bin/python-pkg-roots.sh 0; update-alternatives --install /usr/bin/rapids-python-pkg-names rapids-python-pkg-names /opt/rapids-build-utils/bin/python-pkg-names.sh 0; update-alternatives --install /usr/bin/rapids-python-conda-pkg-names rapids-python-conda-pkg-names /opt/rapids-build-utils/bin/python-conda-pkg-names.sh 0; +update-alternatives --install /usr/bin/rapids-get-jobs-and-archs rapids-get-jobs-and-archs /opt/rapids-build-utils/bin/get-jobs-and-archs.sh 0; find /opt/rapids-build-utils \ \( -type d -exec chmod 0775 {} \; \ diff --git a/features/src/rapids-build-utils/opt/rapids-build-utils/bin/get-jobs-and-archs.sh b/features/src/rapids-build-utils/opt/rapids-build-utils/bin/get-jobs-and-archs.sh new file mode 100755 index 00000000..0a6c2d2e --- /dev/null +++ b/features/src/rapids-build-utils/opt/rapids-build-utils/bin/get-jobs-and-archs.sh @@ -0,0 +1,52 @@ +#! /usr/bin/env bash + +jobs_and_archs() { + set -euo pipefail + + local free_mem=$(free -g | head -n2 | tail -n1 | cut -d ':' -f2 | tr -s '[:space:]' | cut -d' ' -f7); + local max_cpus=$((free_mem / 4)); + local all_cpus=$(nproc); + + local jobs="${JOBS:-${PARALLEL_LEVEL:-$(( all_cpus < max_cpus ? all_cpus : max_cpus ))}}"; + local archs=$(rapids-parse-cmake-var-from-args CMAKE_CUDA_ARCHITECTURES "$@"); + local archs="${archs:-${CMAKE_CUDA_ARCHITECTURES:-${CUDAARCHS:-}}}"; + local n_archs=1; + + case "${archs:-}" in + native | NATIVE) + # should technically be the number of unique GPU archs + # in the system, but this should be good enough for most + n_archs=1; + ;; + all | all-major) + # Max out at 6 threads + n_archs=6; + ;; + ALL | RAPIDS) + # currently: 60-real;70-real;75-real;80-real;86-real;90 + # see: https://github.com/rapidsai/rapids-cmake/blob/branch-23.08/rapids-cmake/cuda/set_architectures.cmake#L54 + n_archs=6; + ;; + *) + # Otherwise if explicitly defined, count the number of archs in the list + _split() { + IFS=';' read -ra ARCHS <<< "$1"; echo -n "${ARCHS[@]}"; + } + archs=($(_split "${archs}")); + n_archs=${#archs[@]}; + ;; + esac + + # Clamp between 1 and 6 threads per nvcc job + n_archs=$(( n_archs < 1 ? 1 : n_archs > 6 ? 6 : n_archs )); + + jobs=$((jobs / 2 * 3 / n_archs + 1)); + + echo "n_jobs=${jobs}"; + echo "n_arch=${n_archs}"; + + # echo "PARALLEL_LEVEL=${PARALLEL_LEVEL}"; + # echo "NVCC_APPEND_FLAGS=--threads=${n_archs}"; +} + +(jobs_and_archs "$@"); diff --git a/features/src/rapids-build-utils/opt/rapids-build-utils/bin/parse-cmake-var-from-args.sh b/features/src/rapids-build-utils/opt/rapids-build-utils/bin/parse-cmake-var-from-args.sh index 64c35010..f11c7e68 100755 --- a/features/src/rapids-build-utils/opt/rapids-build-utils/bin/parse-cmake-var-from-args.sh +++ b/features/src/rapids-build-utils/opt/rapids-build-utils/bin/parse-cmake-var-from-args.sh @@ -3,10 +3,12 @@ parse_cmake_var_from_args() { set -euo pipefail; - echo "$( \ - export $(rapids-parse-cmake-vars-from-args "${@:2}"); \ - echo "\$$1" | envsubst "\$$1"; \ + eval "$( \ + rapids-parse-cmake-vars-from-args "${@:2}" \ + | xargs -r -d'\n' -I% echo -n local %\; \ )"; + + echo "\$$1" | envsubst "\$$1"; } (parse_cmake_var_from_args "$@"); diff --git a/features/src/rapids-build-utils/opt/rapids-build-utils/bin/tmpl/cpp-build.tmpl.sh b/features/src/rapids-build-utils/opt/rapids-build-utils/bin/tmpl/cpp-build.tmpl.sh index 1f419125..3372efbe 100755 --- a/features/src/rapids-build-utils/opt/rapids-build-utils/bin/tmpl/cpp-build.tmpl.sh +++ b/features/src/rapids-build-utils/opt/rapids-build-utils/bin/tmpl/cpp-build.tmpl.sh @@ -6,9 +6,17 @@ build_${CPP_LIB}_cpp() { configure-${CPP_LIB}-cpp "$@"; - cmake --build ~/${CPP_SRC}/build/latest \ - -j${PARALLEL_LEVEL:-$(nproc --ignore=2)} \ - ; + eval "$( \ + rapids-get-jobs-and-archs "$@" \ + | xargs -r -d'\n' -I% echo -n local %\; \ + )"; + + JOBS=${n_jobs} \ + PARALLEL_LEVEL=${n_jobs} \ + NVCC_APPEND_FLAGS="--threads=${n_arch} ${NVCC_APPEND_FLAGS:-}" \ + cmake --build ~/${CPP_SRC}/build/latest \ + --parallel ${n_jobs} \ + --verbose; } (build_${CPP_LIB}_cpp "$@"); diff --git a/features/src/rapids-build-utils/opt/rapids-build-utils/bin/tmpl/python-build.tmpl.sh b/features/src/rapids-build-utils/opt/rapids-build-utils/bin/tmpl/python-build.tmpl.sh index 45974e18..58faa3b2 100755 --- a/features/src/rapids-build-utils/opt/rapids-build-utils/bin/tmpl/python-build.tmpl.sh +++ b/features/src/rapids-build-utils/opt/rapids-build-utils/bin/tmpl/python-build.tmpl.sh @@ -14,6 +14,11 @@ build_${PY_LIB}_python() { build-${CPP_LIB}-cpp "$@"; fi + eval "$( \ + rapids-get-jobs-and-archs "$@" \ + | xargs -r -d'\n' -I% echo -n local %\; \ + )"; + # Define both lowercase and uppercase # `-DFIND__CPP=ON` and `-DFIND__CPP=ON` because the RAPIDS # scikit-build CMakeLists.txt's aren't 100% consistent in the casing @@ -33,7 +38,7 @@ build_${PY_LIB}_python() { local ninja_args=(); ninja_args+=("-v"); - ninja_args+=("-j${PARALLEL_LEVEL:-$(nproc --ignore=2)}"); + ninja_args+=("-j${n_jobs}"); local pip_args=(); pip_args+=("-vv"); @@ -45,10 +50,13 @@ build_${PY_LIB}_python() { trap "rm -rf ~/${PY_SRC}/$(echo "${PY_LIB}" | tr '-' '_').egg-info" EXIT; + JOBS=${n_jobs} \ + PARALLEL_LEVEL=${n_jobs} \ CMAKE_GENERATOR="Ninja" \ SKBUILD_BUILD_OPTIONS="${ninja_args[@]}" \ SETUPTOOLS_ENABLE_FEATURES="legacy-editable" \ CMAKE_ARGS="$(rapids-parse-cmake-args ${cmake_args[@]})" \ + NVCC_APPEND_FLAGS="--threads=${n_arch} ${NVCC_APPEND_FLAGS:-}" \ python -m pip install ${pip_args[@]} \ ; }