From 366f0e2f095c2b4719fa577084300a016560b3e6 Mon Sep 17 00:00:00 2001 From: NvTimLiu <50287591+NvTimLiu@users.noreply.github.com> Date: Sat, 7 Nov 2020 05:02:18 +0800 Subject: [PATCH] Get cudf/spark dependency from the correct .m2 dir (#1062) * Get cudf/spark dependency from the correct .m2 dir 'WORKSPACE' & 'M2DIR' vars are needed for shims to gen the correct cudf/spark dependency info in shims. Below error in 'spark*-info.properties' is due to unset of 'WORKSPACE' & 'M2DIR': build/dependency-info.sh: line 30: /jenkins/printJarVersion.sh: No such file or directory build/dependency-info.sh: line 33: /jenkins/printJarVersion.sh: No such file or directory To fix the error, we set the default values for them in 'build/dependency-info.sh': 'M2DIR=$HOME/.m2/repository' 'WORKSPACE=../..' We also need to explicitly set the correct 'M2DIR' path, in case we change it by '-Dmaven.repo.local=$M2DIR'. Already updated Jenkins scripts to set the correct 'M2DIR'. Signed-off-by: Tim Liu * let mvn package fails in case the script 'build/dependency-info.sh' runs failure * Stop mvn build if `build/build-info` fails Signed-off-by: Tim Liu * Copyright 2020 Signed-off-by: Tim Liu --- build/build-info | 3 ++- build/dependency-info.sh | 8 ++++++-- jenkins/databricks/build.sh | 3 ++- jenkins/printJarVersion.sh | 1 + jenkins/spark-nightly-build.sh | 14 ++++++++------ pom.xml | 4 ++-- shims/spark300/pom.xml | 2 +- shims/spark300emr/pom.xml | 2 +- shims/spark301/pom.xml | 2 +- shims/spark301db/pom.xml | 2 +- shims/spark301emr/pom.xml | 2 +- shims/spark302/pom.xml | 2 +- shims/spark310/pom.xml | 2 +- 13 files changed, 28 insertions(+), 19 deletions(-) diff --git a/build/build-info b/build/build-info index a905661920b..88cd8516d13 100755 --- a/build/build-info +++ b/build/build-info @@ -1,7 +1,7 @@ #!/usr/bin/env bash # -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ # This script generates the build info. # Arguments: # rapids4spark_version - The current version of spark plugin +set -e echo_build_properties() { echo version=$1 diff --git a/build/dependency-info.sh b/build/dependency-info.sh index 7633c79c20f..e26b748ddc0 100755 --- a/build/dependency-info.sh +++ b/build/dependency-info.sh @@ -23,14 +23,18 @@ # SPARK_VER - The version of spark # Parse cudf and spark dependency versions +set -e CUDF_VER=$1 CUDA_CLASSIFIER=$2 SERVER_ID=snapshots -${WORKSPACE}/jenkins/printJarVersion.sh "cudf_version" "${HOME}/.m2/repository/ai/rapids/cudf/${CUDF_VER}" "cudf-${CUDF_VER}" "-${CUDA_CLASSIFIER}.jar" $SERVER_ID +# set defualt values for 'M2DIR' & 'WORKSPACE' so that shims can get the correct cudf/spark dependnecy +M2DIR=${M2DIR:-"$HOME/.m2/repository"} +WORKSPACE=${WORKSPACE:-"../.."} +${WORKSPACE}/jenkins/printJarVersion.sh "cudf_version" "${M2DIR}/ai/rapids/cudf/${CUDF_VER}" "cudf-${CUDF_VER}" "-${CUDA_CLASSIFIER}.jar" $SERVER_ID SPARK_VER=$3 -SPARK_SQL_VER=`${WORKSPACE}/jenkins/printJarVersion.sh "spark_version" "${HOME}/.m2/repository/org/apache/spark/spark-sql_2.12/${SPARK_VER}" "spark-sql_2.12-${SPARK_VER}" ".jar" $SERVER_ID` +SPARK_SQL_VER=`${WORKSPACE}/jenkins/printJarVersion.sh "spark_version" "${M2DIR}/org/apache/spark/spark-sql_2.12/${SPARK_VER}" "spark-sql_2.12-${SPARK_VER}" ".jar" $SERVER_ID` # Split spark version from spark-sql_2.12 jar filename echo ${SPARK_SQL_VER/"-sql_2.12"/} diff --git a/jenkins/databricks/build.sh b/jenkins/databricks/build.sh index 081461facb1..f98044994d4 100755 --- a/jenkins/databricks/build.sh +++ b/jenkins/databricks/build.sh @@ -49,7 +49,8 @@ RAPIDS_BUILT_JAR=rapids-4-spark_$SCALA_VERSION-$SPARK_PLUGIN_JAR_VERSION.jar echo "Scala version is: $SCALA_VERSION" mvn -B -P${BUILD_PROFILES} clean package -DskipTests || true -M2DIR=/home/ubuntu/.m2/repository +# export 'M2DIR' so that shims can get the correct cudf/spark dependnecy info +export M2DIR=/home/ubuntu/.m2/repository CUDF_JAR=${M2DIR}/ai/rapids/cudf/${CUDF_VERSION}/cudf-${CUDF_VERSION}-${CUDA_VERSION}.jar # pull normal Spark artifacts and ignore errors then install databricks jars, then build again diff --git a/jenkins/printJarVersion.sh b/jenkins/printJarVersion.sh index fde53b4e54d..bc509938868 100755 --- a/jenkins/printJarVersion.sh +++ b/jenkins/printJarVersion.sh @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +set -e function print_ver(){ TAG=$1 diff --git a/jenkins/spark-nightly-build.sh b/jenkins/spark-nightly-build.sh index 6f675d09096..265075fb908 100755 --- a/jenkins/spark-nightly-build.sh +++ b/jenkins/spark-nightly-build.sh @@ -19,12 +19,14 @@ set -ex . jenkins/version-def.sh -mvn -U -B -Pinclude-databricks,snapshot-shims clean deploy $MVN_URM_MIRROR -Dmaven.repo.local=$WORKSPACE/.m2 +export 'M2DIR' so that shims can get the correct cudf/spark dependnecy info +export M2DIR="$WORKSPACE/.m2" +mvn -U -B -Pinclude-databricks,snapshot-shims clean deploy $MVN_URM_MIRROR -Dmaven.repo.local=$M2DIR # Run unit tests against other spark versions -mvn -U -B -Pspark301tests,snapshot-shims test $MVN_URM_MIRROR -Dmaven.repo.local=$WORKSPACE/.m2 -mvn -U -B -Pspark302tests,snapshot-shims test $MVN_URM_MIRROR -Dmaven.repo.local=$WORKSPACE/.m2 -mvn -U -B -Pspark310tests,snapshot-shims test $MVN_URM_MIRROR -Dmaven.repo.local=$WORKSPACE/.m2 +mvn -U -B -Pspark301tests,snapshot-shims test $MVN_URM_MIRROR -Dmaven.repo.local=$M2DIR +mvn -U -B -Pspark302tests,snapshot-shims test $MVN_URM_MIRROR -Dmaven.repo.local=$M2DIR +mvn -U -B -Pspark310tests,snapshot-shims test $MVN_URM_MIRROR -Dmaven.repo.local=$M2DIR # Parse cudf and spark files from local mvn repo -jenkins/printJarVersion.sh "CUDFVersion" "${WORKSPACE}/.m2/ai/rapids/cudf/${CUDF_VER}" "cudf-${CUDF_VER}" "-${CUDA_CLASSIFIER}.jar" $SERVER_ID -jenkins/printJarVersion.sh "SPARKVersion" "${WORKSPACE}/.m2/org/apache/spark/spark-core_2.12/${SPARK_VER}" "spark-core_2.12-${SPARK_VER}" ".jar" $SERVER_ID +jenkins/printJarVersion.sh "CUDFVersion" "$M2DIR/ai/rapids/cudf/${CUDF_VER}" "cudf-${CUDF_VER}" "-${CUDA_CLASSIFIER}.jar" $SERVER_ID +jenkins/printJarVersion.sh "SPARKVersion" "$M2DIR/org/apache/spark/spark-core_2.12/${SPARK_VER}" "spark-core_2.12-${SPARK_VER}" ".jar" $SERVER_ID diff --git a/pom.xml b/pom.xml index 2fac2a74ca2..30516e6d078 100644 --- a/pom.xml +++ b/pom.xml @@ -536,8 +536,8 @@ - - + + diff --git a/shims/spark300/pom.xml b/shims/spark300/pom.xml index f74cdc05a20..9fa09ec4c91 100644 --- a/shims/spark300/pom.xml +++ b/shims/spark300/pom.xml @@ -44,7 +44,7 @@ - + diff --git a/shims/spark300emr/pom.xml b/shims/spark300emr/pom.xml index fa3cc7ea2d1..f71fa4c3850 100644 --- a/shims/spark300emr/pom.xml +++ b/shims/spark300emr/pom.xml @@ -44,7 +44,7 @@ - + diff --git a/shims/spark301/pom.xml b/shims/spark301/pom.xml index 1cc032aaa8b..2a12177658f 100644 --- a/shims/spark301/pom.xml +++ b/shims/spark301/pom.xml @@ -44,7 +44,7 @@ - + diff --git a/shims/spark301db/pom.xml b/shims/spark301db/pom.xml index 0617393371f..3d77478d7e2 100644 --- a/shims/spark301db/pom.xml +++ b/shims/spark301db/pom.xml @@ -44,7 +44,7 @@ - + diff --git a/shims/spark301emr/pom.xml b/shims/spark301emr/pom.xml index 861a343adbd..07aca6fd589 100644 --- a/shims/spark301emr/pom.xml +++ b/shims/spark301emr/pom.xml @@ -44,7 +44,7 @@ - + diff --git a/shims/spark302/pom.xml b/shims/spark302/pom.xml index 13490ee73e0..b83fda2fb2e 100644 --- a/shims/spark302/pom.xml +++ b/shims/spark302/pom.xml @@ -44,7 +44,7 @@ - + diff --git a/shims/spark310/pom.xml b/shims/spark310/pom.xml index a9ee5d66ba1..cc66c0d5261 100644 --- a/shims/spark310/pom.xml +++ b/shims/spark310/pom.xml @@ -44,7 +44,7 @@ - +