diff --git a/api_validation/README.md b/api_validation/README.md index 189351846f0..8f544cd31aa 100644 --- a/api_validation/README.md +++ b/api_validation/README.md @@ -1,7 +1,9 @@ # API validation script for Rapids Plugin API validation script checks the compatibility of community Spark Execs and GPU Execs in the Rapids Plugin for Spark. -For example: HashAggregateExec with GpuHashAggregateExec. The script prints Execs where validation fails. +For example: HashAggregateExec with GpuHashAggregateExec. +Script can be used to audit different versions of Spark(3.0.0, 3.0.1-SNAPSHOT and 3.1.0-SNAPSHOT) +The script prints Execs where validation fails. Validation fails when: 1) The number of parameters differ between community Spark Execs and Gpu Execs. 2) Parameters to the exec have a type mismatch. @@ -15,7 +17,11 @@ It requires cudf, rapids-4-spark and spark jars. ``` cd api_validation -mvn scala:run +// To run validation script on all version of Spark(3.0.0, 3.0.1-SNAPSHOT and 3.1.0-SNAPSHOT) +sh auditAllVersions.sh + +// To run script on particular version we can use profile(spark300, spark301 and spark310) +mvn scala:run -P spark300 ``` # Output diff --git a/api_validation/auditAllVersions.sh b/api_validation/auditAllVersions.sh new file mode 100644 index 00000000000..33862ccff6e --- /dev/null +++ b/api_validation/auditAllVersions.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# Copyright (c) 2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +set -ex + +mvn scala:run -P spark300 +mvn scala:run -P spark301 +mvn scala:run -P spark310 diff --git a/api_validation/pom.xml b/api_validation/pom.xml index 1c16f48ddc3..cc88767046e 100644 --- a/api_validation/pom.xml +++ b/api_validation/pom.xml @@ -27,6 +27,27 @@ rapids-4-spark-api-validation 0.2.0-SNAPSHOT + + + spark300 + + ${spark300.version} + + + + spark301 + + ${spark301.version} + + + + spark310 + + ${spark310.version} + + + + org.scala-lang @@ -35,6 +56,7 @@ org.apache.spark spark-sql_${scala.binary.version} + ${spark.version} org.scalatest @@ -53,6 +75,12 @@ ${project.version} provided + + com.nvidia + rapids-4-spark-shims-aggregator_${scala.binary.version} + 0.2.0-SNAPSHOT + provided + diff --git a/api_validation/src/main/scala/com/nvidia/spark/rapids/api/ApiValidation.scala b/api_validation/src/main/scala/com/nvidia/spark/rapids/api/ApiValidation.scala index a23b8d6c02d..607e08fbc5e 100644 --- a/api_validation/src/main/scala/com/nvidia/spark/rapids/api/ApiValidation.scala +++ b/api_validation/src/main/scala/com/nvidia/spark/rapids/api/ApiValidation.scala @@ -69,6 +69,16 @@ object ApiValidation extends Logging { val gpuKeys = gpuExecs.keys var printNewline = false + val sparkToShimMap = Map("3.0.0" -> "spark300", "3.0.1" -> "spark301", "3.1.0" -> "spark310") + val sparkVersion = ShimLoader.getSparkShims.getSparkShimVersion.toString + var shimVersion = sparkToShimMap(sparkVersion) + // There is no separate implementation for Execs in spark-3.0.1. + shimVersion = if (shimVersion == "spark301") { + "spark300" + } else { + shimVersion + } + gpuKeys.foreach { e => // Get SparkExecs argNames and types val sparkTypes = classToTypeTag(e) @@ -83,13 +93,15 @@ object ApiValidation extends Logging { val execType = sparkTypes.tpe.toString.split('.').last val gpu = execType match { case "BroadcastExchangeExec" => s"org.apache.spark.sql.rapids.execution.Gpu" + execType - case "BroadcastHashJoinExec" => s"com.nvidia.spark.rapids.shims.spark300.Gpu" + execType - case "FileSourceScanExec" => s"org.apache.spark.sql.rapids.shims.spark300.Gpu" + execType + case "BroadcastHashJoinExec" => s"com.nvidia.spark.rapids.shims." + shimVersion + + ".Gpu" + execType + case "FileSourceScanExec" => s"org.apache.spark.sql.rapids.shims." + shimVersion + + ".Gpu" + execType case "CartesianProductExec" => s"org.apache.spark.sql.rapids.Gpu" + execType case "BroadcastNestedLoopJoinExec" => - s"com.nvidia.spark.rapids.shims.spark300.Gpu" + execType + s"com.nvidia.spark.rapids.shims." + shimVersion + ".Gpu" + execType case "SortMergeJoinExec" | "ShuffledHashJoinExec" => - s"com.nvidia.spark.rapids.shims.spark300.GpuShuffledHashJoinExec" + s"com.nvidia.spark.rapids.shims." + shimVersion + ".GpuShuffledHashJoinExec" case "SortAggregateExec" => s"com.nvidia.spark.rapids.GpuHashAggregateExec" case _ => s"com.nvidia.spark.rapids.Gpu" + execType }