From 468ea1a20a4cdff4717d5dee874ef9a92b72ed20 Mon Sep 17 00:00:00 2001 From: Sameer Raheja Date: Thu, 25 Feb 2021 18:03:58 -0800 Subject: [PATCH 1/2] Remove SNAPSHOT from test and integration_test READMEs (#1780) Signed-off-by: Sameer Raheja --- integration_tests/README.md | 8 ++++---- tests/README.md | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/integration_tests/README.md b/integration_tests/README.md index 468d405b79e..7ae2b7d11ec 100644 --- a/integration_tests/README.md +++ b/integration_tests/README.md @@ -107,7 +107,7 @@ individually, so you don't risk running unit tests along with the integration te http://www.scalatest.org/user_guide/using_the_scalatest_shell ```shell -spark-shell --jars rapids-4-spark-tests_2.12-0.4.0-SNAPSHOT-tests.jar,rapids-4-spark-udf-examples_2.12-0.4.0-SNAPSHOT,rapids-4-spark-integration-tests_2.12-0.4.0-SNAPSHOT-tests.jar,scalatest_2.12-3.0.5.jar,scalactic_2.12-3.0.5.jar +spark-shell --jars rapids-4-spark-tests_2.12-0.4.0-tests.jar,rapids-4-spark-udf-examples_2.12-0.4.0,rapids-4-spark-integration-tests_2.12-0.4.0-tests.jar,scalatest_2.12-3.0.5.jar,scalactic_2.12-3.0.5.jar ``` First you import the `scalatest_shell` and tell the tests where they can find the test files you @@ -131,7 +131,7 @@ If you just want to verify the SQL replacement is working you will need to add t example assumes CUDA 10.1 is being used. ``` -$SPARK_HOME/bin/spark-submit --jars "rapids-4-spark_2.12-0.4.0-SNAPSHOT.jar,rapids-4-spark-udf-examples_2.12-0.4.0-SNAPSHOT.jar,cudf-0.18-SNAPSHOT-cuda10-1.jar" ./runtests.py +$SPARK_HOME/bin/spark-submit --jars "rapids-4-spark_2.12-0.4.0.jar,rapids-4-spark-udf-examples_2.12-0.4.0.jar,cudf-0.18-cuda10-1.jar" ./runtests.py ``` You don't have to enable the plugin for this to work, the test framework will do that for you. @@ -183,7 +183,7 @@ The TPCxBB, TPCH, TPCDS, and Mortgage tests in this framework can be enabled by As an example, here is the `spark-submit` command with the TPCxBB parameters on CUDA 10.1: ``` -$SPARK_HOME/bin/spark-submit --jars "rapids-4-spark_2.12-0.4.0-SNAPSHOT.jar,rapids-4-spark-udf-examples_2.12-0.4.0-SNAPSHOT.jar,cudf-0.18-SNAPSHOT-cuda10-1.jar,rapids-4-spark-tests_2.12-0.4.0-SNAPSHOT.jar" ./runtests.py --tpcxbb_format="csv" --tpcxbb_path="/path/to/tpcxbb/csv" +$SPARK_HOME/bin/spark-submit --jars "rapids-4-spark_2.12-0.4.0.jar,rapids-4-spark-udf-examples_2.12-0.4.0.jar,cudf-0.18-cuda10-1.jar,rapids-4-spark-tests_2.12-0.4.0.jar" ./runtests.py --tpcxbb_format="csv" --tpcxbb_path="/path/to/tpcxbb/csv" ``` Be aware that running these tests with read data requires at least an entire GPU, and preferable several GPUs/executors @@ -212,7 +212,7 @@ To run cudf_udf tests, need following configuration changes: As an example, here is the `spark-submit` command with the cudf_udf parameter on CUDA 10.1: ``` -$SPARK_HOME/bin/spark-submit --jars "rapids-4-spark_2.12-0.4.0-SNAPSHOT.jar,rapids-4-spark-udf-examples_2.12-0.4.0-SNAPSHOT.jar,cudf-0.18-SNAPSHOT-cuda10-1.jar,rapids-4-spark-tests_2.12-0.4.0-SNAPSHOT.jar" --conf spark.rapids.memory.gpu.allocFraction=0.3 --conf spark.rapids.python.memory.gpu.allocFraction=0.3 --conf spark.rapids.python.concurrentPythonWorkers=2 --py-files "rapids-4-spark_2.12-0.4.0-SNAPSHOT.jar" --conf spark.executorEnv.PYTHONPATH="rapids-4-spark_2.12-0.4.0-SNAPSHOT.jar" ./runtests.py --cudf_udf +$SPARK_HOME/bin/spark-submit --jars "rapids-4-spark_2.12-0.4.0.jar,rapids-4-spark-udf-examples_2.12-0.4.0.jar,cudf-0.18-cuda10-1.jar,rapids-4-spark-tests_2.12-0.4.0.jar" --conf spark.rapids.memory.gpu.allocFraction=0.3 --conf spark.rapids.python.memory.gpu.allocFraction=0.3 --conf spark.rapids.python.concurrentPythonWorkers=2 --py-files "rapids-4-spark_2.12-0.4.0.jar" --conf spark.executorEnv.PYTHONPATH="rapids-4-spark_2.12-0.4.0.jar" ./runtests.py --cudf_udf ``` ## Writing tests diff --git a/tests/README.md b/tests/README.md index 9d50c0ba603..8b503d1c685 100644 --- a/tests/README.md +++ b/tests/README.md @@ -20,7 +20,7 @@ we typically run with the default options and only increase the scale factor dep dbgen -b dists.dss -s 10 ``` -You can include the test jar `rapids-4-spark-integration-tests_2.12-0.4.0-SNAPSHOT.jar` with the +You can include the test jar `rapids-4-spark-integration-tests_2.12-0.4.0.jar` with the Spark --jars option to get the TPCH tests. To setup for the queries you can run `TpchLikeSpark.setupAllCSV` for CSV formatted data or `TpchLikeSpark.setupAllParquet` for parquet formatted data. Both of those take the Spark session, and a path to the dbgen From 9887a434591a776611dff8f20c18148995c14aeb Mon Sep 17 00:00:00 2001 From: pxLi Date: Fri, 26 Feb 2021 11:02:26 +0800 Subject: [PATCH 2/2] make modified check pre-merge only (#1820) Signed-off-by: Peixin Li --- dist/pom.xml | 48 +++++++++++++++++++++------------ jenkins/spark-premerge-build.sh | 2 +- 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/dist/pom.xml b/dist/pom.xml index 700d1b5ebea..f0b212aa5b8 100644 --- a/dist/pom.xml +++ b/dist/pom.xml @@ -179,23 +179,6 @@ - - org.codehaus.mojo - exec-maven-plugin - - - if_modified_files - verify - - exec - - - bash - -c 'export MODIFIED=$(git status --porcelain | grep "^ M"); [[ -z $MODIFIED ]] && exit 0 || { echo -e "found modified files during mvn verify:\n$MODIFIED"; exit 1;}' - - - - org.apache.rat apache-rat-plugin @@ -208,4 +191,35 @@ + + + + pre-merge + + + + org.codehaus.mojo + exec-maven-plugin + + + if_modified_files + verify + + exec + + + bash + -c 'export MODIFIED=$(git status --porcelain | grep "^ M"); [[ -z $MODIFIED ]] && exit 0 || { echo -e "found modified files during mvn verify:\n$MODIFIED"; exit 1;}' + + + + + + + + false + + + + diff --git a/jenkins/spark-premerge-build.sh b/jenkins/spark-premerge-build.sh index 456376149b9..9e66b97c770 100755 --- a/jenkins/spark-premerge-build.sh +++ b/jenkins/spark-premerge-build.sh @@ -37,7 +37,7 @@ export PATH="$SPARK_HOME/bin:$SPARK_HOME/sbin:$PATH" tar zxf $SPARK_HOME.tgz -C $ARTF_ROOT && \ rm -f $SPARK_HOME.tgz -mvn -U -B $MVN_URM_MIRROR '-P!snapshot-shims' clean verify -Dpytest.TEST_TAGS='' -Dpytest.TEST_TYPE="pre-commit" -Dpytest.TEST_PARALLEL=4 +mvn -U -B $MVN_URM_MIRROR '-P!snapshot-shims,pre-merge' clean verify -Dpytest.TEST_TAGS='' -Dpytest.TEST_TYPE="pre-commit" -Dpytest.TEST_PARALLEL=4 # Run the unit tests for other Spark versions but dont run full python integration tests env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark301tests,snapshot-shims test -Dpytest.TEST_TAGS='' env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark311tests,snapshot-shims test -Dpytest.TEST_TAGS=''