From 468ea1a20a4cdff4717d5dee874ef9a92b72ed20 Mon Sep 17 00:00:00 2001
From: Sameer Raheja <sameerz@users.noreply.github.com>
Date: Thu, 25 Feb 2021 18:03:58 -0800
Subject: [PATCH 1/2] Remove SNAPSHOT from test and integration_test READMEs
 (#1780)

Signed-off-by: Sameer Raheja <sraheja@nvidia.com>
---
 integration_tests/README.md | 8 ++++----
 tests/README.md             | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/integration_tests/README.md b/integration_tests/README.md
index 468d405b79e..7ae2b7d11ec 100644
--- a/integration_tests/README.md
+++ b/integration_tests/README.md
@@ -107,7 +107,7 @@ individually, so you don't risk running unit tests along with the integration te
 http://www.scalatest.org/user_guide/using_the_scalatest_shell
 
 ```shell 
-spark-shell --jars rapids-4-spark-tests_2.12-0.4.0-SNAPSHOT-tests.jar,rapids-4-spark-udf-examples_2.12-0.4.0-SNAPSHOT,rapids-4-spark-integration-tests_2.12-0.4.0-SNAPSHOT-tests.jar,scalatest_2.12-3.0.5.jar,scalactic_2.12-3.0.5.jar
+spark-shell --jars rapids-4-spark-tests_2.12-0.4.0-tests.jar,rapids-4-spark-udf-examples_2.12-0.4.0,rapids-4-spark-integration-tests_2.12-0.4.0-tests.jar,scalatest_2.12-3.0.5.jar,scalactic_2.12-3.0.5.jar
 ```
 
 First you import the `scalatest_shell` and tell the tests where they can find the test files you
@@ -131,7 +131,7 @@ If you just want to verify the SQL replacement is working you will need to add t
 example assumes CUDA 10.1 is being used.
 
 ```
-$SPARK_HOME/bin/spark-submit --jars "rapids-4-spark_2.12-0.4.0-SNAPSHOT.jar,rapids-4-spark-udf-examples_2.12-0.4.0-SNAPSHOT.jar,cudf-0.18-SNAPSHOT-cuda10-1.jar" ./runtests.py
+$SPARK_HOME/bin/spark-submit --jars "rapids-4-spark_2.12-0.4.0.jar,rapids-4-spark-udf-examples_2.12-0.4.0.jar,cudf-0.18-cuda10-1.jar" ./runtests.py
 ```
 
 You don't have to enable the plugin for this to work, the test framework will do that for you.
@@ -183,7 +183,7 @@ The TPCxBB, TPCH, TPCDS, and Mortgage tests in this framework can be enabled by
 As an example, here is the `spark-submit` command with the TPCxBB parameters on CUDA 10.1:
 
 ```
-$SPARK_HOME/bin/spark-submit --jars "rapids-4-spark_2.12-0.4.0-SNAPSHOT.jar,rapids-4-spark-udf-examples_2.12-0.4.0-SNAPSHOT.jar,cudf-0.18-SNAPSHOT-cuda10-1.jar,rapids-4-spark-tests_2.12-0.4.0-SNAPSHOT.jar" ./runtests.py --tpcxbb_format="csv" --tpcxbb_path="/path/to/tpcxbb/csv"
+$SPARK_HOME/bin/spark-submit --jars "rapids-4-spark_2.12-0.4.0.jar,rapids-4-spark-udf-examples_2.12-0.4.0.jar,cudf-0.18-cuda10-1.jar,rapids-4-spark-tests_2.12-0.4.0.jar" ./runtests.py --tpcxbb_format="csv" --tpcxbb_path="/path/to/tpcxbb/csv"
 ```
 
 Be aware that running these tests with read data requires at least an entire GPU, and preferable several GPUs/executors
@@ -212,7 +212,7 @@ To run cudf_udf tests, need following configuration changes:
 As an example, here is the `spark-submit` command with the cudf_udf parameter on CUDA 10.1:
 
 ```
-$SPARK_HOME/bin/spark-submit --jars "rapids-4-spark_2.12-0.4.0-SNAPSHOT.jar,rapids-4-spark-udf-examples_2.12-0.4.0-SNAPSHOT.jar,cudf-0.18-SNAPSHOT-cuda10-1.jar,rapids-4-spark-tests_2.12-0.4.0-SNAPSHOT.jar" --conf spark.rapids.memory.gpu.allocFraction=0.3 --conf spark.rapids.python.memory.gpu.allocFraction=0.3 --conf spark.rapids.python.concurrentPythonWorkers=2 --py-files "rapids-4-spark_2.12-0.4.0-SNAPSHOT.jar" --conf spark.executorEnv.PYTHONPATH="rapids-4-spark_2.12-0.4.0-SNAPSHOT.jar" ./runtests.py --cudf_udf
+$SPARK_HOME/bin/spark-submit --jars "rapids-4-spark_2.12-0.4.0.jar,rapids-4-spark-udf-examples_2.12-0.4.0.jar,cudf-0.18-cuda10-1.jar,rapids-4-spark-tests_2.12-0.4.0.jar" --conf spark.rapids.memory.gpu.allocFraction=0.3 --conf spark.rapids.python.memory.gpu.allocFraction=0.3 --conf spark.rapids.python.concurrentPythonWorkers=2 --py-files "rapids-4-spark_2.12-0.4.0.jar" --conf spark.executorEnv.PYTHONPATH="rapids-4-spark_2.12-0.4.0.jar" ./runtests.py --cudf_udf
 ```
 
 ## Writing tests
diff --git a/tests/README.md b/tests/README.md
index 9d50c0ba603..8b503d1c685 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -20,7 +20,7 @@ we typically run with the default options and only increase the scale factor dep
 dbgen -b dists.dss -s 10
 ```
 
-You can include the test jar `rapids-4-spark-integration-tests_2.12-0.4.0-SNAPSHOT.jar` with the
+You can include the test jar `rapids-4-spark-integration-tests_2.12-0.4.0.jar` with the
 Spark --jars option to get the TPCH tests. To setup for the queries you can run 
 `TpchLikeSpark.setupAllCSV` for CSV formatted data or `TpchLikeSpark.setupAllParquet`
 for parquet formatted data.  Both of those take the Spark session, and a path to the dbgen

From 9887a434591a776611dff8f20c18148995c14aeb Mon Sep 17 00:00:00 2001
From: pxLi <pxli@nyu.edu>
Date: Fri, 26 Feb 2021 11:02:26 +0800
Subject: [PATCH 2/2] make modified check pre-merge only (#1820)

Signed-off-by: Peixin Li <pxli@nyu.edu>
---
 dist/pom.xml                    | 48 +++++++++++++++++++++------------
 jenkins/spark-premerge-build.sh |  2 +-
 2 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/dist/pom.xml b/dist/pom.xml
index 700d1b5ebea..f0b212aa5b8 100644
--- a/dist/pom.xml
+++ b/dist/pom.xml
@@ -179,23 +179,6 @@
           </execution>
         </executions>
       </plugin>
-      <plugin>
-        <groupId>org.codehaus.mojo</groupId>
-        <artifactId>exec-maven-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>if_modified_files</id>
-            <phase>verify</phase>
-            <goals>
-              <goal>exec</goal>
-            </goals>
-            <configuration>
-              <executable>bash</executable>
-              <commandlineArgs>-c 'export MODIFIED=$(git status --porcelain | grep "^ M"); [[ -z $MODIFIED ]] &amp;&amp; exit 0 || { echo -e "found modified files during mvn verify:\n$MODIFIED"; exit 1;}'</commandlineArgs>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
       <plugin>
         <groupId>org.apache.rat</groupId>
         <artifactId>apache-rat-plugin</artifactId>
@@ -208,4 +191,35 @@
       </plugin>
     </plugins>
   </build>
+
+  <profiles>
+    <profile>
+      <id>pre-merge</id>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.codehaus.mojo</groupId>
+            <artifactId>exec-maven-plugin</artifactId>
+            <executions>
+              <execution>
+                <id>if_modified_files</id>
+                <phase>verify</phase>
+                <goals>
+                  <goal>exec</goal>
+                </goals>
+                <configuration>
+                  <executable>bash</executable>
+                  <commandlineArgs>-c 'export MODIFIED=$(git status --porcelain | grep "^ M"); [[ -z $MODIFIED ]] &amp;&amp; exit 0 || { echo -e "found modified files during mvn verify:\n$MODIFIED"; exit 1;}'</commandlineArgs>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+      <activation>
+        <activeByDefault>false</activeByDefault>
+      </activation>
+    </profile>
+  </profiles>
+
 </project>
diff --git a/jenkins/spark-premerge-build.sh b/jenkins/spark-premerge-build.sh
index 456376149b9..9e66b97c770 100755
--- a/jenkins/spark-premerge-build.sh
+++ b/jenkins/spark-premerge-build.sh
@@ -37,7 +37,7 @@ export PATH="$SPARK_HOME/bin:$SPARK_HOME/sbin:$PATH"
 tar zxf $SPARK_HOME.tgz -C $ARTF_ROOT && \
     rm -f $SPARK_HOME.tgz
 
-mvn -U -B $MVN_URM_MIRROR '-P!snapshot-shims' clean verify -Dpytest.TEST_TAGS='' -Dpytest.TEST_TYPE="pre-commit" -Dpytest.TEST_PARALLEL=4
+mvn -U -B $MVN_URM_MIRROR '-P!snapshot-shims,pre-merge' clean verify -Dpytest.TEST_TAGS='' -Dpytest.TEST_TYPE="pre-commit" -Dpytest.TEST_PARALLEL=4
 # Run the unit tests for other Spark versions but dont run full python integration tests
 env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark301tests,snapshot-shims test -Dpytest.TEST_TAGS=''
 env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Pspark311tests,snapshot-shims test -Dpytest.TEST_TAGS=''