From 3d7be7483ba4f55053e28de2ec4e5b12473d3490 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Fri, 10 May 2024 14:00:10 -0500 Subject: [PATCH] use libucx wheels (#1041) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Contributes to https://github.com/rapidsai/build-planning/issues/57. Similar to https://github.com/rapidsai/ucxx/pull/226, proposes using the new UCX wheels from https://github.com/rapidsai/ucx-wheels, instead of vendoring system versions of `libuc{m,p,s,t}.so`. ## Benefits of these changes Allows users of `ucx-py` to avoid needing system installations of the UCX libraries. Shrinks the `ucx-py` wheels by 6.7MB compressed (77%) and 19.1 MB uncompressed (73%).
how I calculated that (click me) Mounting in a directory with a wheel built from this branch... ```shell docker run \ --rm \ -v $(pwd)/final_dist:/opt/work \ -it python:3.10 \ bash pip install pydistcheck pydistcheck --inspect /opt/work/*.whl ``` ```text ----- package inspection summary ----- file size * compressed size: 2.0M * uncompressed size: 7.0M * compression space saving: 71.3% contents * directories: 10 * files: 38 (2 compiled) size by extension * .so - 6.9M (97.7%) * .py - 0.1M (2.0%) * .pyx - 9.3K (0.1%) * no-extension - 7.1K (0.1%) * .pyi - 3.9K (0.1%) * .c - 1.7K (0.0%) * .txt - 39.0B (0.0%) largest files * (5.3M) ucp/_libs/ucx_api.cpython-310-x86_64-linux-gnu.so * (1.6M) ucp/_libs/arr.cpython-310-x86_64-linux-gnu.so * (36.3K) ucp/core.py * (20.3K) ucp/benchmarks/cudf_merge.py * (12.1K) ucp/benchmarks/send_recv.py ``` Compared to a recent nightly release. ```shell pip download \ -d /tmp/delete-me \ --prefer-binary \ --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple \ 'ucx-py-cu12>=0.38.0a' pydistcheck --inspect /tmp/delete-me/*.whl ``` ```text ----- package inspection summary ----- file size * compressed size: 8.7M * uncompressed size: 26.1M * compression space saving: 66.8% contents * directories: 11 * files: 65 (21 compiled) size by extension * .0 - 14.4M (55.4%) * .so - 8.4M (32.2%) * .a - 1.8M (6.7%) * .140 - 0.7M (2.5%) * .12 - 0.7M (2.5%) * .py - 0.1M (0.5%) * .pyx - 9.3K (0.0%) * no-extension - 7.3K (0.0%) * .la - 4.2K (0.0%) * .pyi - 3.9K (0.0%) * .c - 1.7K (0.0%) * .txt - 39.0B (0.0%) largest files * (8.7M) ucx_py_cu12.libs/libucp-5720f0c9.so.0.0.0 * (5.3M) ucp/_libs/ucx_api.cpython-310-x86_64-linux-gnu.so * (2.0M) ucx_py_cu12.libs/libucs-3c3009f0.so.0.0.0 * (1.6M) ucp/_libs/arr.cpython-310-x86_64-linux-gnu.so * (1.5M) ucx_py_cu12.libs/libuct-2a15b69b.so.0.0.0 ```
## Notes for Reviewers Left some comments on the diff describing specific design choices. ### The libraries from the `libucx` wheel are only used if a system installation isn't available Built a wheel in a container using the same image used here in CI. ```shell docker run \ --rm \ --gpus 1 \ --env-file "${HOME}/.aws/creds.env" \ --env CI=true \ -v $(pwd):/opt/work \ -w /opt/work \ -it rapidsai/ci-wheel:cuda12.2.2-rockylinux8-py3.10 \ bash ci/build_wheel.sh ``` Found that the libraries from the `libucx` wheel are correctly found at build time, and are later found at import time.
using 'rapidsai/citestwheel' image and LD_DEBUG (click me) ```shell # run a RAPIDS wheel-testing container, mount in the directory with the built wheel docker run \ --rm \ --gpus 1 \ -v $(pwd)/final_dist:/opt/work \ -w /opt/work \ -it rapidsai/citestwheel:cuda12.2.2-ubuntu22.04-py3.10 \ bash ``` `rapidsai/citestwheel` does NOT the UCX libraries installed at `/usr/lib*`. ```shell find /usr -name 'libucm.so*' # (empty) ``` Installed the `ucx-py` wheel. ```shell # install the wheel pip install ./*.whl # now libuc{m,p,s,t} at found in site-packages find /usr -name 'libucm.so*' # (empty) find /pyenv -name 'libucm.so*' # /pyenv/versions/3.10.14/lib/python3.10/site-packages/libucx/lib/libucm.so.0.0.0 # /pyenv/versions/3.10.14/lib/python3.10/site-packages/libucx/lib/libucm.so.0 # /pyenv/versions/3.10.14/lib/python3.10/site-packages/libucx/lib/libucm.so # try importing ucx-py and track where 'ld' finds the ucx libraries LD_DEBUG="files,libs" LD_DEBUG_OUTPUT=out.txt \ python -c "from ucp._libs import arr" # 'ld' creates multiple files... combine them to 1 for easier searching cat out.txt.* > out-full.txt ``` In that output, saw that `ld` was finding `libucs.so` first. It searched all the system paths before finally finding it in the `libucx` wheel. ```text 1037: file=libucs.so [0]; dynamically loaded by /pyenv/versions/3.10.14/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so [0] 1037: find library=libucs.so [0]; searching 1037: search path= (LD_LIBRARY_PATH) 1037: search path=/pyenv/versions/3.10.14/lib (RUNPATH from file /pyenv/versions/3.10.14/bin/python) 1037: trying file=/pyenv/versions/3.10.14/lib/libucs.so 1037: search cache=/etc/ld.so.cache 1037: search path=/lib/x86_64-linux-gnu/glibc-hwcaps/x86-64-v3:/lib/x86_64-linux-gnu/glibc-hwcaps/x86-64-v2:/lib/x86_64-linux-gnu/tls/haswell/x86_64:/lib/x86_64-linux-gnu/tls/haswell:/lib/x86_64-linux-gnu/tls/x86_64:/lib/x86_64-linux-gnu/tls:/lib/x86_64-linux-gnu/haswell/x86_64:/lib/x86_64-linux-gnu/haswell:/lib/x86_64-linux-gnu/x86_64:/lib/x86_64-linux-gnu:/usr/lib/x86_64-linux-gnu/glibc-hwcaps/x86-64-v3:/usr/lib/x86_64-linux-gnu/glibc-hwcaps/x86-64-v2:/usr/lib/x86_64-linux-gnu/tls/haswell/x86_64:/usr/lib/x86_64-linux-gnu/tls/haswell:/usr/lib/x86_64-linux-gnu/tls/x86_64:/usr/lib/x86_64-linux-gnu/tls:/usr/lib/x86_64-linux-gnu/haswell/x86_64:/usr/lib/x86_64-linux-gnu/haswell:/usr/lib/x86_64-linux-gnu/x86_64:/usr/lib/x86_64-linux-gnu:/lib/glibc-hwcaps/x86-64-v3:/lib/glibc-hwcaps/x86-64-v2:/lib/tls/haswell/x86_64:/lib/tls/haswell:/lib/tls/x86_64:/lib/tls:/lib/haswell/x86_64:/lib/haswell:/lib/x86_64:/lib:/usr/lib/glibc-hwcaps/x86-64-v3:/usr/lib/glibc-hwcaps/x86-64-v2:/usr/lib/tls/haswell/x86_64:/usr/lib/tls/haswell:/usr/lib/tls/x86_64:/usr/lib/tls:/usr/lib/haswell/x86_64:/usr/lib/haswell:/usr/lib/x86_64:/usr/lib (system search path) 1037: trying file=/lib/x86_64-linux-gnu/glibc-hwcaps/x86-64-v3/libucs.so 1037: trying file=/lib/x86_64-linux-gnu/glibc-hwcaps/x86-64-v2/libucs.so 1037: trying file=/lib/x86_64-linux-gnu/tls/haswell/x86_64/libucs.so 1037: trying file=/lib/x86_64-linux-gnu/tls/haswell/libucs.so 1037: trying file=/lib/x86_64-linux-gnu/tls/x86_64/libucs.so 1037: trying file=/lib/x86_64-linux-gnu/tls/libucs.so 1037: trying file=/lib/x86_64-linux-gnu/haswell/x86_64/libucs.so 1037: trying file=/lib/x86_64-linux-gnu/haswell/libucs.so 1037: trying file=/lib/x86_64-linux-gnu/x86_64/libucs.so 1037: trying file=/lib/x86_64-linux-gnu/libucs.so 1037: trying file=/usr/lib/x86_64-linux-gnu/glibc-hwcaps/x86-64-v3/libucs.so 1037: trying file=/usr/lib/x86_64-linux-gnu/glibc-hwcaps/x86-64-v2/libucs.so 1037: trying file=/usr/lib/x86_64-linux-gnu/tls/haswell/x86_64/libucs.so 1037: trying file=/usr/lib/x86_64-linux-gnu/tls/haswell/libucs.so 1037: trying file=/usr/lib/x86_64-linux-gnu/tls/x86_64/libucs.so 1037: trying file=/usr/lib/x86_64-linux-gnu/tls/libucs.so 1037: trying file=/usr/lib/x86_64-linux-gnu/haswell/x86_64/libucs.so 1037: trying file=/usr/lib/x86_64-linux-gnu/haswell/libucs.so 1037: trying file=/usr/lib/x86_64-linux-gnu/x86_64/libucs.so 1037: trying file=/usr/lib/x86_64-linux-gnu/libucs.so 1037: trying file=/lib/glibc-hwcaps/x86-64-v3/libucs.so 1037: trying file=/lib/glibc-hwcaps/x86-64-v2/libucs.so 1037: trying file=/lib/tls/haswell/x86_64/libucs.so 1037: trying file=/lib/tls/haswell/libucs.so 1037: trying file=/lib/tls/x86_64/libucs.so 1037: trying file=/lib/tls/libucs.so 1037: trying file=/lib/haswell/x86_64/libucs.so 1037: trying file=/lib/haswell/libucs.so 1037: trying file=/lib/x86_64/libucs.so 1037: trying file=/lib/libucs.so 1037: trying file=/usr/lib/glibc-hwcaps/x86-64-v3/libucs.so 1037: trying file=/usr/lib/glibc-hwcaps/x86-64-v2/libucs.so 1037: trying file=/usr/lib/tls/haswell/x86_64/libucs.so 1037: trying file=/usr/lib/tls/haswell/libucs.so 1037: trying file=/usr/lib/tls/x86_64/libucs.so 1037: trying file=/usr/lib/tls/libucs.so 1037: trying file=/usr/lib/haswell/x86_64/libucs.so 1037: trying file=/usr/lib/haswell/libucs.so 1037: trying file=/usr/lib/x86_64/libucs.so 1037: trying file=/usr/lib/libucs.so 1037: 1037: file=/pyenv/versions/3.10.14/lib/python3.10/site-packages/libucx/lib/libucs.so [0]; dynamically loaded by /pyenv/versions/3.10.14/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so [0] 1037: file=/pyenv/versions/3.10.14/lib/python3.10/site-packages/libucx/lib/libucs.so [0]; generating link map 1037: dynamic: 0x00007f4ce42d7c80 base: 0x00007f4ce427e000 size: 0x000000000006fda0 1037: entry: 0x00007f4ce4290ce0 phdr: 0x00007f4ce427e040 phnum: 1 ``` Then the others were found via the RPATH entries on `libucs.so`. `libucm.so.0`: ```text 196: file=libucm.so.0 [0]; needed by /pyenv/versions/3.10.14/lib/python3.10/site-packages/libucx/lib/libucs.so [0] 196: find library=libucm.so.0 [0]; searching 196: search path=...redacted...:/pyenv/versions/3.10.14/lib/python3.10/site-packages/libucx/lib (RPATH from file /pyenv/versions/3.10.14/lib/python3.10/site-packages/libucx/lib/libucs.so) ... ```
However, the libraries from the `libucx` wheel appear to be **the last place `ld` searches**. That means that if you use these wheels on a system with a system installation of `libuc{m,p,s,t}`, that system installation's libraries will be loaded instead.
using 'rapidsai/ci-wheel' image and LD_DEBUG (click me) ```shell docker run \ --rm \ --gpus 1 \ -v $(pwd)/final_dist:/opt/work \ -w /opt/work \ -it rapidsai/ci-wheel:cuda12.2.2-rockylinux8-py3.10 \ bash ``` `rapidsai/ci-wheel` has the UCX libraries installed at `/usr/lib64`. ```shell find /usr/ -name 'libucm.so*' # /usr/lib64/libucm.so.0.0.0 # /usr/lib64/libucm.so.0 # /usr/lib64/libucm.so ``` Installed a wheel and tried to import from it. ```shell pip install ./*.whl LD_DEBUG="files,libs" LD_DEBUG_OUTPUT=out.txt \ python -c "from ucp._libs import arr" cat out.txt.* > out-full.txt ``` In that situation, I saw the system libraries found before the one from the wheel. ```text 226: file=libucs.so [0]; dynamically loaded by /pyenv/versions/3.10.14/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so [0] 226: find library=libucs.so [0]; searching 226: search path=/pyenv/versions/3.10.14/lib (RPATH from file /pyenv/versions/3.10.14/bin/python) 226: trying file=/pyenv/versions/3.10.14/lib/libucs.so 226: search path=/pyenv/versions/3.10.14/lib (RPATH from file /pyenv/versions/3.10.14/bin/python) 226: trying file=/pyenv/versions/3.10.14/lib/libucs.so 226: search path=/opt/rh/gcc-toolset-11/root/usr/lib64/tls:/opt/rh/gcc-toolset-11/root/usr/lib64:/opt/rh/gcc-toolset-11/root/usr/lib (LD_LIBRARY_PATH) 226: trying file=/opt/rh/gcc-toolset-11/root/usr/lib64/tls/libucs.so 226: trying file=/opt/rh/gcc-toolset-11/root/usr/lib64/libucs.so 226: trying file=/opt/rh/gcc-toolset-11/root/usr/lib/libucs.so 226: search cache=/etc/ld.so.cache 226: trying file=/usr/lib64/libucs.so ``` In this case, when the system libraries are available, `site-packages/libucx/lib` isn't even searched.
To avoid any RAPIDS-specific stuff tricking me, I tried in a generic `python:3.10` image. Found that the library could be loaded and all the `libuc{m,p,s,t}` libraries from the `libucx` wheel are found 🎉 .
using 'python:3.10' wheel (click me) ```shell docker run \ --rm \ --gpus 1 \ -v $(pwd)/final_dist:/opt/work \ -w /opt/work \ -it python:3.10 \ bash pip install \ --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple \ ./*.whl LD_DEBUG="files,libs" LD_DEBUG_OUTPUT=out.txt \ python -c "from ucp._libs import arr" ``` 💥 ```text 16: opening file=/usr/local/lib/python3.10/site-packages/libucx/lib/libucm.so.0 [0]; direct_opencount=1 16: 16: opening file=/usr/local/lib/python3.10/site-packages/libucx/lib/libucs.so [0]; direct_opencount=1 ```
Authors: - James Lamb (https://github.com/jameslamb) Approvers: - Peter Andreas Entschev (https://github.com/pentschev) - Vyas Ramasubramani (https://github.com/vyasr) - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/ucx-py/pull/1041 --- .gitignore | 3 +- .readthedocs.yml | 8 +-- ci/build_docs_pre_install.sh | 17 +++++ ci/build_wheel.sh | 105 +++++++------------------------ conda/environments/builddocs.yml | 4 +- conda/recipes/ucx-py/meta.yaml | 6 +- dependencies.yaml | 47 ++++++++++++++ pyproject.toml | 2 + setup.py | 49 +++++++++++++++ ucp/__init__.py | 12 ++++ 10 files changed, 161 insertions(+), 92 deletions(-) create mode 100755 ci/build_docs_pre_install.sh diff --git a/.gitignore b/.gitignore index a155793c4..b0898274a 100644 --- a/.gitignore +++ b/.gitignore @@ -10,8 +10,9 @@ dask-worker-space __pytestcache__ __pycache__ *.egg-info/ +final_dist/ dist/ .vscode *.sw[po] - +*.whl diff --git a/.readthedocs.yml b/.readthedocs.yml index 937f721db..f672b74d8 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -4,11 +4,9 @@ build: os: "ubuntu-22.04" tools: python: "mambaforge-22.9" - -python: - install: - - method: pip - path: . + jobs: + post_checkout: + - bash ci/build_docs_pre_install.sh conda: environment: conda/environments/builddocs.yml diff --git a/ci/build_docs_pre_install.sh b/ci/build_docs_pre_install.sh new file mode 100755 index 000000000..95a38f724 --- /dev/null +++ b/ci/build_docs_pre_install.sh @@ -0,0 +1,17 @@ +#!/bin/bash +# Copyright (c) 2024 NVIDIA CORPORATION. +# +# [description] +# +# ucx-py's docs builds require installing the library. +# +# It does that by running 'pip install .' from the root of the repo. This script +# is used to modify readthedocs' local checkout of this project's source code prior +# to that 'pip install' being run. +# +# For more, see https://docs.readthedocs.io/en/stable/build-customization.html +# + +set -euo pipefail + +sed -r -i "s/\"libucx/\"libucx-cu12/g" ./pyproject.toml diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 9cc871e9a..5417f3090 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -6,6 +6,19 @@ set -euo pipefail package_name="ucx-py" underscore_package_name=$(echo "${package_name}" | tr "-" "_") +# Clear out system ucx files to ensure that we're getting ucx from the wheel. +rm -rf /usr/lib64/ucx +rm -rf /usr/lib64/libucm.* +rm -rf /usr/lib64/libucp.* +rm -rf /usr/lib64/libucs.* +rm -rf /usr/lib64/libucs_signal.* +rm -rf /usr/lib64/libuct.* + +rm -rf /usr/include/ucm +rm -rf /usr/include/ucp +rm -rf /usr/include/ucs +rm -rf /usr/include/uct + source rapids-configure-sccache source rapids-date-string @@ -35,96 +48,22 @@ if ! rapids-is-release-build; then fi sed -r -i "s/cudf==(.*)\"/cudf${PACKAGE_CUDA_SUFFIX}==\1${alpha_spec}\"/g" ${pyproject_file} +sed -r -i "/\"libucx([=><]+)/ s/\"libucx/\"libucx${PACKAGE_CUDA_SUFFIX}/g" ${pyproject_file} if [[ $PACKAGE_CUDA_SUFFIX == "-cu12" ]]; then sed -i "s/cupy-cuda11x/cupy-cuda12x/g" ${pyproject_file} fi - python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check mkdir -p final_dist -python -m auditwheel repair -w final_dist dist/* - -# Auditwheel rewrites dynamic libraries that are referenced at link time in the -# package. However, UCX loads a number of sub-libraries at runtime via dlopen; -# these are not picked up by auditwheel. Since we have a priori knowledge of -# what these libraries are, we mimic the behaviour of auditwheel by using the -# same hash-based uniqueness scheme and rewriting the link paths. - -WHL=$(realpath final_dist/${underscore_package_name}*manylinux*.whl) - -# first grab the auditwheel hashes for libuc{tms} -LIBUCM=$(unzip -l $WHL | awk 'match($4, /libucm-[^\.]+\./) { print substr($4, RSTART) }') -LIBUCT=$(unzip -l $WHL | awk 'match($4, /libuct-[^\.]+\./) { print substr($4, RSTART) }') -LIBUCS=$(unzip -l $WHL | awk 'match($4, /libucs-[^\.]+\./) { print substr($4, RSTART) }') - -# Extract the libraries that have already been patched in by auditwheel -mkdir -p repair_dist/${underscore_package_name}_${RAPIDS_PY_CUDA_SUFFIX}.libs/ucx -unzip $WHL "${underscore_package_name}_${RAPIDS_PY_CUDA_SUFFIX}.libs/*.so*" -d repair_dist/ - -# Patch the RPATH to include ORIGIN for each library -pushd repair_dist/${underscore_package_name}_${RAPIDS_PY_CUDA_SUFFIX}.libs -for f in libu*.so* -do - if [[ -f $f ]]; then - patchelf --add-rpath '$ORIGIN' $f - fi -done - -popd - -# Now copy in all the extra libraries that are only ever loaded at runtime -pushd repair_dist/${underscore_package_name}_${RAPIDS_PY_CUDA_SUFFIX}.libs/ucx -if [[ -d /usr/lib64/ucx ]]; then - cp -P /usr/lib64/ucx/* . -elif [[ -d /usr/lib/ucx ]]; then - cp -P /usr/lib/ucx/* . -else - echo "Could not find ucx libraries" - exit 1 -fi - -# we link against /lib/site-packages/${underscore_package_name}_${RAPIDS_PY_CUDA_SUFFIX}.lib/libuc{ptsm} -# we also amend the rpath to search one directory above to *find* libuc{tsm} -for f in libu*.so* -do - # Avoid patching symlinks, which is redundant - if [[ ! -L $f ]]; then - patchelf --replace-needed libuct.so.0 $LIBUCT $f - patchelf --replace-needed libucs.so.0 $LIBUCS $f - patchelf --replace-needed libucm.so.0 $LIBUCM $f - patchelf --add-rpath '$ORIGIN/..' $f - fi -done - -# Bring in cudart as well. To avoid symbol collision with other libraries e.g. -# cupy we mimic auditwheel by renaming the libraries to include the hashes of -# their names. Since there will typically be a chain of symlinks -# libcudart.so->libcudart.so.X->libcudart.so.X.Y.Z we need to follow the chain -# and rename all of them. - -find /usr/local/cuda/ -name "libcudart*.so*" | xargs cp -P -t . -src=libcudart.so -hash=$(sha256sum ${src} | awk '{print substr($1, 0, 8)}') -target=$(basename $(readlink -f ${src})) - -mv ${target} ${target/libcudart/libcudart-${hash}} -while readlink ${src} > /dev/null; do - target=$(readlink ${src}) - ln -s ${target/libcudart/libcudart-${hash}} ${src/libcudart/libcudart-${hash}} - rm -f ${src} - src=${target} -done - -to_rewrite=$(ldd libuct_cuda.so | awk '/libcudart/ { print $1 }') -patchelf --replace-needed ${to_rewrite} libcudart-${hash}.so libuct_cuda.so -patchelf --add-rpath '$ORIGIN' libuct_cuda.so - -popd - -pushd repair_dist -zip -r $WHL ${underscore_package_name}_${RAPIDS_PY_CUDA_SUFFIX}.libs/ -popd +python -m auditwheel repair \ + -w final_dist \ + --exclude "libucm.so.0" \ + --exclude "libucp.so.0" \ + --exclude "libucs.so.0" \ + --exclude "libucs_signal.so.0" \ + --exclude "libuct.so.0" \ + dist/* RAPIDS_PY_WHEEL_NAME="${underscore_package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 final_dist diff --git a/conda/environments/builddocs.yml b/conda/environments/builddocs.yml index 392ac63c0..cd9766abd 100644 --- a/conda/environments/builddocs.yml +++ b/conda/environments/builddocs.yml @@ -14,5 +14,7 @@ dependencies: - recommonmark - pandoc=<2.0.0 - pip -- ucx +- pip: + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple + - ../../ - cython diff --git a/conda/recipes/ucx-py/meta.yaml b/conda/recipes/ucx-py/meta.yaml index f0a81f7f1..1e6d6ab4c 100644 --- a/conda/recipes/ucx-py/meta.yaml +++ b/conda/recipes/ucx-py/meta.yaml @@ -30,13 +30,15 @@ requirements: - python - pip - ucx - {% for r in data.get("build-system", {}).get("requires", []) %} + # 'libucx' wheel dependency is unnecessary... the 'ucx' conda-forge package is used here instead + {% for r in data.get("build-system", {}).get("requires", []) if not r.startswith("libucx") %} - {{ r }} {% endfor %} run: - python - ucx >=1.15.0,<1.16.0 - {% for r in data.get("project", {}).get("dependencies", []) %} + # 'libucx' wheel dependency is unnecessary... the 'ucx' conda-forge package is used here instead + {% for r in data.get("project", {}).get("dependencies", []) if not r.startswith("libucx") %} - {{ r }} {% endfor %} diff --git a/dependencies.yaml b/dependencies.yaml index 0a7524467..9b7e3b562 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -27,6 +27,7 @@ files: table: build-system includes: - build_python + - depends_on_ucx_build py_run: output: pyproject pyproject_dir: . @@ -34,6 +35,7 @@ files: table: project includes: - run + - depends_on_ucx_run py_optional_test: output: pyproject pyproject_dir: . @@ -115,9 +117,54 @@ dependencies: packages: - numpy>=1.23,<2.0a0 - pynvml>=11.4.1 + depends_on_ucx_build: + common: + - output_types: conda + packages: + - ucx==1.15.0 + - output_types: requirements + packages: + # pip recognizes the index as a global option for the requirements.txt file + - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: {cuda: "12.*"} + packages: + - libucx-cu12==1.15.0 + - matrix: {cuda: "11.*"} + packages: + - libucx-cu11==1.15.0 + # NOTE: this fallback needs to be a real, suffixed version + # so 'pip install .' (e.g. as used in docs builds) will work + - matrix: null + packages: + - libucx==1.15.0 + depends_on_ucx_run: + common: - output_types: conda packages: - ucx>=1.15.0,<1.16 + - output_types: requirements + packages: + # pip recognizes the index as a global option for the requirements.txt file + - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple + specific: + - output_types: [requirements, pyproject] + matrices: + - matrix: {cuda: "12.*"} + packages: + - libucx-cu12>=1.15.0,<1.16 + - matrix: {cuda: "11.*"} + packages: + - libucx-cu11>=1.15.0,<1.16 + # NOTE: this fallback needs to be a real, suffixed version + # so "pip install ." (e.g. as used in docs builds) will work + - matrix: null + packages: + - libucx>=1.15.0,<1.16 test_python: common: - output_types: [conda, requirements, pyproject] diff --git a/pyproject.toml b/pyproject.toml index 2c932784d..0c3939f35 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ build-backend = "setuptools.build_meta" requires = [ "cython>=3.0.0", + "libucx==1.15.0", "setuptools>=64.0.0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit dependencies.yaml and run `rapids-dependency-file-generator`. @@ -30,6 +31,7 @@ authors = [ license = { text = "BSD-3-Clause" } requires-python = ">=3.9" dependencies = [ + "libucx>=1.15.0,<1.16", "numpy>=1.23,<2.0a0", "pynvml>=11.4.1", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit dependencies.yaml and run `rapids-dependency-file-generator`. diff --git a/setup.py b/setup.py index 9a52b30c1..2c238c3ca 100644 --- a/setup.py +++ b/setup.py @@ -5,6 +5,7 @@ from __future__ import absolute_import, print_function +import glob import os from distutils.sysconfig import get_config_var, get_python_inc @@ -12,11 +13,59 @@ from setuptools import setup from setuptools.extension import Extension + +def _find_libucx_libs_and_headers(): + """ + If the 'libucx' wheel is not installed, returns a tuple of empty lists. + In that case, the project will be compiled against system installations + of the UCX libraries. + + If 'libucx' is installed, returns lists of library and header paths to help + the compiler and linker find its contents. In that case, the project will + be compiled against those libucx-wheel-provided versions of the UCX libraries. + """ + try: + import libucx + except ImportError: + return [], [] + + # find 'libucx' + module_dir = os.path.dirname(libucx.__file__) + + # find where it stores files like 'libucm.so.0' + libs = glob.glob(f"{module_dir}/**/lib*.so*", recursive=True) + + # deduplicate those library paths + lib_dirs = {os.path.dirname(f) for f in libs} + if not lib_dirs: + raise RuntimeError( + f"Did not find shared libraries in 'libucx' install location ({module_dir})" + ) + + # find where it stores headers + headers = glob.glob(f"{module_dir}/**/include", recursive=True) + + # deduplicate those header paths (and ensure the list only includes directories) + header_dirs = {f for f in headers if os.path.isdir(f)} + if not header_dirs: + raise RuntimeError( + f"Did not find UCX headers 'libucx' install location ({module_dir})" + ) + + return list(lib_dirs), list(header_dirs) + + include_dirs = [os.path.dirname(get_python_inc())] library_dirs = [get_config_var("LIBDIR")] libraries = ["ucp", "uct", "ucm", "ucs"] extra_compile_args = ["-std=c99", "-Werror"] +# tell the compiler and linker where to find UCX libraries and their headers +# provided by the 'libucx' wheel +libucx_lib_dirs, libucx_header_dirs = _find_libucx_libs_and_headers() +library_dirs.extend(libucx_lib_dirs) +include_dirs.extend(libucx_header_dirs) + ext_modules = [ Extension( diff --git a/ucp/__init__.py b/ucp/__init__.py index 791860b1d..390fbf459 100644 --- a/ucp/__init__.py +++ b/ucp/__init__.py @@ -16,6 +16,18 @@ logger.debug("Setting env UCX_MEMTYPE_CACHE=n, which is required by UCX") os.environ["UCX_MEMTYPE_CACHE"] = "n" + +# If libucx was installed as a wheel, we must request it to load the library symbols. +# Otherwise, we assume that the library was installed in a system path that ld can find. +try: + import libucx +except ImportError: + pass +else: + libucx.load_library() + del libucx + + from .core import * # noqa from .core import get_ucx_version # noqa from .utils import get_ucxpy_logger # noqa