NVIDIA · tgravescs · Sep 14, 2021 · Sep 3, 2021 · Sep 3, 2021 · Sep 3, 2021
diff --git a/dist/README.md b/dist/README.md
@@ -27,3 +27,4 @@ If you have to change the contents of the uber jar the following files control w
 
 1. `unshimmed-base.txt` - this has classes and files that should go into the base jar with their normal package name (not shaded). This includes user visible classes (ie com/nvidia/spark/SQLPlugin), python files, and other files that aren't version specific. Uses Spark 3.0.1 built jar for these base classes.
 2. `unshimmed-extras.txt` - This is applied to all the individual Spark specific verson jars to pull any files that need to go into the base of the jar and not into the Spark specific directory from all of the other Spark version jars.
+3. `unshimmed-spark311.txt` - This is applied to all the Spark 3.1.1 specific verson to pull any files that need to go into the base of the jar and not into the Spark specific directory from all of the other Spark version jars.
@@ -245,13 +245,19 @@
                                                 src="${project.build.directory}/deps/rapids-4-spark-aggregator_${scala.binary.version}-${project.version}-spark311.jar"
                                                 dest="${project.build.directory}/parallel-world/"
                                         >
-                                            <patternset refid="includeMeta"/>
+                                            <patternset id="includes-spark311">
+                                                <includesfile name="${project.basedir}/unshimmed-extras.txt"/>
+                                                <includesfile name="${project.basedir}/unshimmed-spark311.txt"/>
+                                            </patternset>
                                         </unzip>
                                         <unzip
                                                 src="${project.build.directory}/deps/rapids-4-spark-aggregator_${scala.binary.version}-${project.version}-spark311.jar"
                                                 dest="${project.build.directory}/parallel-world/spark311"
                                         >
-                                            <patternset refid="excludeMeta"/>
+                                            <patternset id="excludes-spark311">
+                                                <excludesfile name="${project.basedir}/unshimmed-extras.txt"/>
+                                                <excludesfile name="${project.basedir}/unshimmed-spark311.txt"/>
+                                            </patternset>
                                         </unzip>
                                         <unzip
                                                 src="${project.build.directory}/deps/rapids-4-spark-aggregator_${scala.binary.version}-${project.version}-spark312.jar"
@@ -516,13 +522,19 @@
                                                 src="${project.build.directory}/deps/rapids-4-spark-aggregator_${scala.binary.version}-${project.version}-spark311.jar"
                                                 dest="${project.build.directory}/parallel-world/"
                                         >
-                                            <patternset refid="includeMeta"/>
+                                            <patternset id="includes-spark311">
+                                                <includesfile name="${project.basedir}/unshimmed-extras.txt"/>
+                                                <includesfile name="${project.basedir}/unshimmed-spark311.txt"/>
+                                            </patternset>
                                         </unzip>
                                         <unzip
                                                 src="${project.build.directory}/deps/rapids-4-spark-aggregator_${scala.binary.version}-${project.version}-spark311.jar"
                                                 dest="${project.build.directory}/parallel-world/spark311"
                                         >
-                                            <patternset refid="excludeMeta"/>
+                                            <patternset id="excludes-spark311">
+                                                <excludesfile name="${project.basedir}/unshimmed-extras.txt"/>
+                                                <excludesfile name="${project.basedir}/unshimmed-spark311.txt"/>
+                                            </patternset>
                                         </unzip>
                                         <unzip
                                                 src="${project.build.directory}/deps/rapids-4-spark-aggregator_${scala.binary.version}-${project.version}-spark312.jar"
@@ -728,13 +740,19 @@
                                                 src="${project.build.directory}/deps/rapids-4-spark-aggregator_${scala.binary.version}-${project.version}-spark312.jar"
                                                 dest="${project.build.directory}/parallel-world/"
                                         >
-                                            <patternset refid="includeMeta"/>
+                                            <patternset id="includes-spark311">
+                                                <includesfile name="${project.basedir}/unshimmed-extras.txt"/>
+                                                <includesfile name="${project.basedir}/unshimmed-spark311.txt"/>
+                                            </patternset>
                                         </unzip>
                                         <unzip
                                                 src="${project.build.directory}/deps/rapids-4-spark-aggregator_${scala.binary.version}-${project.version}-spark312.jar"
                                                 dest="${project.build.directory}/parallel-world/spark312"
                                         >
-                                            <patternset refid="excludeMeta"/>
+                                            <patternset id="excludes-spark311">
+                                                <excludesfile name="${project.basedir}/unshimmed-extras.txt"/>
+                                                <excludesfile name="${project.basedir}/unshimmed-spark311.txt"/>
+                                            </patternset>
                                         </unzip>
 
                                         <unzip
@@ -1014,13 +1032,19 @@
                                                 src="${project.build.directory}/deps/rapids-4-spark-aggregator_${scala.binary.version}-${project.version}-spark311.jar"
                                                 dest="${project.build.directory}/parallel-world/"
                                         >
-                                            <patternset refid="includeMeta"/>
+                                            <patternset id="includes-spark311">
+                                                <includesfile name="${project.basedir}/unshimmed-extras.txt"/>
+                                                <includesfile name="${project.basedir}/unshimmed-spark311.txt"/>
+                                            </patternset>
                                         </unzip>
                                         <unzip
                                                 src="${project.build.directory}/deps/rapids-4-spark-aggregator_${scala.binary.version}-${project.version}-spark311.jar"
                                                 dest="${project.build.directory}/parallel-world/spark311"
                                         >
-                                            <patternset refid="excludeMeta"/>
+                                            <patternset id="excludes-spark311">
+                                                <excludesfile name="${project.basedir}/unshimmed-extras.txt"/>
+                                                <excludesfile name="${project.basedir}/unshimmed-spark311.txt"/>
+                                            </patternset>
                                         </unzip>
                                         <unzip
                                                 src="${project.build.directory}/deps/rapids-4-spark-aggregator_${scala.binary.version}-${project.version}-spark312.jar"
@@ -1342,13 +1366,19 @@
                                                 src="${project.build.directory}/deps/rapids-4-spark-aggregator_${scala.binary.version}-${project.version}-spark311.jar"
                                                 dest="${project.build.directory}/parallel-world/"
                                         >
-                                            <patternset refid="includeMeta"/>
+                                            <patternset id="includes-spark311">
+                                                <includesfile name="${project.basedir}/unshimmed-extras.txt"/>
+                                                <includesfile name="${project.basedir}/unshimmed-spark311.txt"/>
+                                            </patternset>
                                         </unzip>
                                         <unzip
                                                 src="${project.build.directory}/deps/rapids-4-spark-aggregator_${scala.binary.version}-${project.version}-spark311.jar"
                                                 dest="${project.build.directory}/parallel-world/spark311"
                                         >
-                                            <patternset refid="excludeMeta"/>
+                                            <patternset id="excludes-spark311">
+                                                <excludesfile name="${project.basedir}/unshimmed-extras.txt"/>
+                                                <excludesfile name="${project.basedir}/unshimmed-spark311.txt"/>
+                                            </patternset>
                                         </unzip>
                                         <unzip
                                                 src="${project.build.directory}/deps/rapids-4-spark-aggregator_${scala.binary.version}-${project.version}-spark312.jar"
@@ -1622,6 +1652,7 @@
                                             <patternset id="sharedWorld">
                                                 <includesfile name="${project.basedir}/unshimmed-base.txt"/>
                                                 <includesfile name="${project.basedir}/unshimmed-extras.txt"/>
+                                                <includesfile name="${project.basedir}/unshimmed-spark311.txt"/>
                                             </patternset>
                                         </unzip>
                                         <unzip

diff --git a/dist/unshimmed-spark311.txt b/dist/unshimmed-spark311.txt
@@ -0,0 +1,2 @@
+com/nvidia/spark/ParquetCachedBatchSerializer*
+com/nvidia/spark/GpuCachedBatchSerializer*
diff --git a/docs/additional-functionality/cache-serializer.md b/docs/additional-functionality/cache-serializer.md
@@ -37,7 +37,7 @@ nav_order: 2
 
   To use this serializer please run Spark with the following conf.
   ```
-  spark-shell --conf spark.sql.cache.serializer=com.nvidia.spark.rapids.shims.spark311.ParquetCachedBatchSerializer"
+  spark-shell --conf spark.sql.cache.serializer=com.nvidia.spark.ParquetCachedBatchSerializer"
   ```
 
 

@@ -59,17 +59,16 @@ IS_SPARK_311_OR_LATER=0
 [[ "$(printf '%s\n' "3.1.1" "$BASE_SPARK_VER" | sort -V | head -n1)" = "3.1.1" ]] && IS_SPARK_311_OR_LATER=1
 
 TEST_TYPE="nightly"
-PCBS_CONF="com.nvidia.spark.rapids.shims.spark311.ParquetCachedBatchSerializer"
+PCBS_CONF="com.nvidia.spark.ParquetCachedBatchSerializer"
 if [ -d "$LOCAL_JAR_PATH" ]; then
     ## Run tests with jars in the LOCAL_JAR_PATH dir downloading from the denpedency repo
     LOCAL_JAR_PATH=$LOCAL_JAR_PATH bash $LOCAL_JAR_PATH/integration_tests/run_pyspark_from_build.sh  --runtime_env="databricks" --test_type=$TEST_TYPE
 
-    # Temporarily only run on Spark 3.1.1 (https://github.com/NVIDIA/spark-rapids/issues/3311)
     ## Run cache tests
-    #if [[ "$IS_SPARK_311_OR_LATER" -eq "1" ]]; then
-    #  PYSP_TEST_spark_sql_cache_serializer=${PCBS_CONF} \
-    #   LOCAL_JAR_PATH=$LOCAL_JAR_PATH bash $LOCAL_JAR_PATH/integration_tests/run_pyspark_from_build.sh  --runtime_env="databricks" --test_type=$TEST_TYPE -k cache_test
-    #fi
+    if [[ "$IS_SPARK_311_OR_LATER" -eq "1" ]]; then
+      PYSP_TEST_spark_sql_cache_serializer=${PCBS_CONF} \
+       LOCAL_JAR_PATH=$LOCAL_JAR_PATH bash $LOCAL_JAR_PATH/integration_tests/run_pyspark_from_build.sh  --runtime_env="databricks" --test_type=$TEST_TYPE -k cache_test
+    fi
 
     ## Run cudf-udf tests
     CUDF_UDF_TEST_ARGS="$CUDF_UDF_TEST_ARGS --conf spark.executorEnv.PYTHONPATH=`ls $LOCAL_JAR_PATH/rapids-4-spark_*.jar | grep -v 'tests.jar'`"
@@ -80,12 +79,11 @@ else
     ## Run tests with jars building from the spark-rapids source code
     bash /home/ubuntu/spark-rapids/integration_tests/run_pyspark_from_build.sh --runtime_env="databricks" --test_type=$TEST_TYPE
 
-    # Temporarily only run on Spark 3.1.1 (https://github.com/NVIDIA/spark-rapids/issues/3311)
     ## Run cache tests
-    #if [[ "$IS_SPARK_311_OR_LATER" -eq "1" ]]; then
-    #  PYSP_TEST_spark_sql_cache_serializer=${PCBS_CONF} \
-    #   bash /home/ubuntu/spark-rapids/integration_tests/run_pyspark_from_build.sh --runtime_env="databricks" --test_type=$TEST_TYPE -k cache_test
-    #fi
+    if [[ "$IS_SPARK_311_OR_LATER" -eq "1" ]]; then
+      PYSP_TEST_spark_sql_cache_serializer=${PCBS_CONF} \
+       bash /home/ubuntu/spark-rapids/integration_tests/run_pyspark_from_build.sh --runtime_env="databricks" --test_type=$TEST_TYPE -k cache_test
+    fi
 
     ## Run cudf-udf tests
     CUDF_UDF_TEST_ARGS="$CUDF_UDF_TEST_ARGS --conf spark.executorEnv.PYTHONPATH=`ls /home/ubuntu/spark-rapids/dist/target/rapids-4-spark_*.jar | grep -v 'tests.jar'`"

@@ -69,9 +69,6 @@ IS_SPARK_311_OR_LATER=0
 export SPARK_TASK_MAXFAILURES=1
 [[ "$IS_SPARK_311_OR_LATER" -eq "0" ]] && SPARK_TASK_MAXFAILURES=4
 
-IS_SPARK_311=0
-[[ "$SPARK_VER" == "3.1.1" ]] && IS_SPARK_311=1
-
 export PATH="$SPARK_HOME/bin:$SPARK_HOME/sbin:$PATH"
 
 #stop and restart SPARK ETL
@@ -138,7 +135,7 @@ run_test() {
 
       cache_serializer)
         SPARK_SUBMIT_FLAGS="$BASE_SPARK_SUBMIT_ARGS $SEQ_CONF \
-        --conf spark.sql.cache.serializer=com.nvidia.spark.rapids.shims.spark311.ParquetCachedBatchSerializer" \
+        --conf spark.sql.cache.serializer=com.nvidia.spark.ParquetCachedBatchSerializer" \
           ./run_pyspark_from_build.sh -k cache_test
         ;;
 
@@ -179,7 +176,7 @@ fi
 run_test cudf_udf_test
 
 # Temporarily only run on Spark 3.1.1 (https://github.com/NVIDIA/spark-rapids/issues/3311)
-if [[ "$IS_SPARK_311" -eq "1" ]]; then
+if [[ "$IS_SPARK_311_OR_LATER" -eq "1" ]]; then
   run_test cache_serializer
 fi