Skip to content

Commit

Permalink
UCX 1.16.0 upgrade (NVIDIA#10190)
Browse files Browse the repository at this point in the history
* Test with jucx 1.16.0

Signed-off-by: Alessandro Bellina <abellina@nvidia.com>

* Change UCX versions in dockerfiles to 1.16.0-rc4 as a test

* Fix scala2.13

* github downloads are keyed to rc1 not rc4

* Add print statements in spark-premerge-build temporarily to debug issue

* Revert "Add print statements in spark-premerge-build temporarily to debug issue"

This reverts commit 768f56d.

* Update dockerfiles for UCX 1.16-rc1

* Update copyrights

* Fix copyrights for scala2.13

---------

Signed-off-by: Alessandro Bellina <abellina@nvidia.com>
  • Loading branch information
abellina authored Jan 26, 2024
1 parent 24001fa commit 8ab25cd
Show file tree
Hide file tree
Showing 8 changed files with 28 additions and 23 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2022-2023, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -24,7 +24,7 @@
# - ROCKY_VER: Rocky Linux OS version

ARG CUDA_VER=11.8.0
ARG UCX_VER=1.15.0
ARG UCX_VER=1.16.0-rc1
ARG UCX_CUDA_VER=11
ARG UCX_ARCH=x86_64
ARG ROCKY_VER=8
Expand All @@ -33,11 +33,11 @@ ARG UCX_VER
ARG UCX_CUDA_VER
ARG UCX_ARCH

RUN yum update -y && yum install -y wget bzip2 numactl-libs libgomp
# note that libibmad is a temporary workaround for a missed dependency in ucx-1.16 rpm
RUN yum update -y && yum install -y wget bzip2 numactl-libs libgomp libibmad
RUN ls /usr/lib
RUN mkdir /tmp/ucx_install && cd /tmp/ucx_install && \
wget https://github.com/openucx/ucx/releases/download/v$UCX_VER/ucx-$UCX_VER-centos8-mofed5-cuda$UCX_CUDA_VER-$UCX_ARCH.tar.bz2 && \
tar -xvf *.bz2 && \
rpm -i ucx-$UCX_VER*.rpm && \
rpm -i ucx-cuda-$UCX_VER*.rpm --nodeps && \
rpm -i `ls ucx-[0-9]*.rpm ucx-cuda-[0-9]*.rpm` --nodeps && \
rm -rf /tmp/ucx_install
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2022-2023, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -24,7 +24,7 @@
# - ROCKY_VER: Rocky Linux OS version

ARG CUDA_VER=11.8.0
ARG UCX_VER=1.15.0
ARG UCX_VER=1.16.0-rc1
ARG UCX_CUDA_VER=11
ARG UCX_ARCH=x86_64
ARG ROCKY_VER=8
Expand All @@ -33,11 +33,10 @@ ARG UCX_VER
ARG UCX_CUDA_VER
ARG UCX_ARCH

RUN yum update -y && yum install -y wget bzip2 rdma-core numactl-libs libgomp libibverbs librdmacm
# note that libibmad is a temporary workaround for a missed dependency in ucx-1.16 rpm
RUN yum update -y && yum install -y wget bzip2 rdma-core numactl-libs libgomp libibverbs librdmacm libibmad
RUN mkdir /tmp/ucx_install && cd /tmp/ucx_install && \
wget https://github.com/openucx/ucx/releases/download/v$UCX_VER/ucx-$UCX_VER-centos8-mofed5-cuda$UCX_CUDA_VER-$UCX_ARCH.tar.bz2 && \
tar -xvf *.bz2 && \
rpm -i ucx-$UCX_VER*.rpm && \
rpm -i ucx-cuda-$UCX_VER*.rpm --nodeps && \
rpm -i ucx-ib-$UCX_VER-1.el8.x86_64.rpm ucx-rdmacm-$UCX_VER-1.el8.x86_64.rpm && \
rpm -i `ls ucx-[0-9]*.rpm ucx-cuda-[0-9]*.rpm ucx-ib-[0-9]*.rpm ucx-rdmacm-[0-9]*.rpm` --nodeps && \
rm -rf /tmp/ucx_install
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -25,7 +25,7 @@
#

ARG CUDA_VER=11.8.0
ARG UCX_VER=1.15.0
ARG UCX_VER=1.16.0-rc1
ARG UCX_CUDA_VER=11
ARG UCX_ARCH=x86_64
ARG UBUNTU_VER=20.04
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -35,7 +35,7 @@

ARG RDMA_CORE_VERSION=32.1
ARG CUDA_VER=11.8.0
ARG UCX_VER=1.15.0
ARG UCX_VER=1.16.0-rc1
ARG UCX_CUDA_VER=11
ARG UCX_ARCH=x86_64
ARG UBUNTU_VER=20.04
Expand Down
4 changes: 2 additions & 2 deletions jenkins/Dockerfile-blossom.multi
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2023-2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -26,7 +26,7 @@

ARG CUDA_VER=11.8.0
ARG UBUNTU_VER=20.04
ARG UCX_VER=1.15.0
ARG UCX_VER=1.16.0-rc1
# multi-platform build with: docker buildx build --platform linux/arm64,linux/amd64 <ARGS> on either amd64 or arm64 host
# check available official arm-based docker images at https://hub.docker.com/r/nvidia/cuda/tags (OS/ARCH)
FROM --platform=$TARGETPLATFORM nvidia/cuda:${CUDA_VER}-runtime-ubuntu${UBUNTU_VER}
Expand Down
4 changes: 2 additions & 2 deletions jenkins/Dockerfile-blossom.ubuntu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2020-2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -27,7 +27,7 @@

ARG CUDA_VER=11.0.3
ARG UBUNTU_VER=20.04
ARG UCX_VER=1.15.0
ARG UCX_VER=1.16.0-rc1
ARG UCX_CUDA_VER=11
FROM nvidia/cuda:${CUDA_VER}-runtime-ubuntu${UBUNTU_VER}
ARG CUDA_VER
Expand Down
7 changes: 5 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Copyright (c) 2020-2023, NVIDIA CORPORATION.
Copyright (c) 2020-2024, NVIDIA CORPORATION.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -653,6 +653,7 @@
<id>arm64</id>
<properties>
<jni.classifier>${cuda.version}-arm64</jni.classifier>
<ucx.version>${ucx.baseVersion}-aarch64</ucx.version>
</properties>
</profile>
<profile>
Expand Down Expand Up @@ -735,7 +736,9 @@
https://github.com/openjdk/jdk17/blob/4afbcaf55383ec2f5da53282a1547bac3d099e9d/src/jdk.compiler/share/classes/com/sun/tools/javac/resources/compiler.properties#L1993-L1994
-->
<scala.javac.args>-Xlint:all,-serial,-path,-try,-processing|-Werror</scala.javac.args>
<ucx.version>1.15.0</ucx.version>
<ucx.baseVersion>1.16.0-rc4</ucx.baseVersion>
<!-- ucx x86 is just the base version (implied), arm is specified under arm64 profile. -->
<ucx.version>${ucx.baseVersion}</ucx.version>
<rapids.compressed.artifact>true</rapids.compressed.artifact>
<rapids.default.jar.excludePattern/>
<rapids.default.jar.phase>package</rapids.default.jar.phase>
Expand Down
7 changes: 5 additions & 2 deletions scala2.13/pom.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Copyright (c) 2020-2023, NVIDIA CORPORATION.
Copyright (c) 2020-2024, NVIDIA CORPORATION.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -653,6 +653,7 @@
<id>arm64</id>
<properties>
<jni.classifier>${cuda.version}-arm64</jni.classifier>
<ucx.version>${ucx.baseVersion}-aarch64</ucx.version>
</properties>
</profile>
<profile>
Expand Down Expand Up @@ -735,7 +736,9 @@
https://github.com/openjdk/jdk17/blob/4afbcaf55383ec2f5da53282a1547bac3d099e9d/src/jdk.compiler/share/classes/com/sun/tools/javac/resources/compiler.properties#L1993-L1994
-->
<scala.javac.args>-Xlint:all,-serial,-path,-try,-processing|-Werror</scala.javac.args>
<ucx.version>1.15.0</ucx.version>
<ucx.baseVersion>1.16.0-rc4</ucx.baseVersion>
<!-- ucx x86 is just the base version (implied), arm is specified under arm64 profile. -->
<ucx.version>${ucx.baseVersion}</ucx.version>
<rapids.compressed.artifact>true</rapids.compressed.artifact>
<rapids.default.jar.excludePattern/>
<rapids.default.jar.phase>package</rapids.default.jar.phase>
Expand Down

0 comments on commit 8ab25cd

Please sign in to comment.