Skip to content

Commit

Permalink
Fix 330 build error and add 322 shims layer [databricks] (#4447)
Browse files Browse the repository at this point in the history
* Add Shims 322

Signed-off-by: Chong Gao <res_life@163.com>

* Fix the 330 build errors

Signed-off-by: Firestarman <firestarmanllc@gmail.com>

* Add Shims 322

Signed-off-by: Chong Gao <res_life@163.com>

* Update doc

Signed-off-by: Chong Gao <res_life@163.com>

* Update comments

Signed-off-by: Chong Gao <res_life@163.com>

* Fix the build errors related to partitioning

Signed-off-by: Firestarman <firestarmanllc@gmail.com>

* Correct the improt order

Signed-off-by: Firestarman <firestarmanllc@gmail.com>

* add 322 version in the jenkens version def file

Signed-off-by: Chong Gao <res_life@163.com>

* Refactor

Signed-off-by: Chong Gao <res_life@163.com>

Co-authored-by: Firestarman <firestarmanllc@gmail.com>
  • Loading branch information
Chong Gao and firestarman authored Jan 4, 2022
1 parent 5250808 commit 9281caa
Show file tree
Hide file tree
Showing 44 changed files with 1,863 additions and 111 deletions.
1 change: 1 addition & 0 deletions build/buildall
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ case $DIST_PROFILE in
313
320
321
322
330
)
;;
Expand Down
1 change: 1 addition & 0 deletions dist/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
304,
313,
321,
322,
330
</snapshot.buildvers>
<databricks.buildvers>
Expand Down
1 change: 1 addition & 0 deletions docs/additional-functionality/rapids-shuffle.md
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@ In this section, we are using a docker container built using the sample dockerfi
| 3.1.3 | com.nvidia.spark.rapids.spark313.RapidsShuffleManager |
| 3.2.0 | com.nvidia.spark.rapids.spark320.RapidsShuffleManager |
| 3.2.1 | com.nvidia.spark.rapids.spark321.RapidsShuffleManager |
| 3.2.2 | com.nvidia.spark.rapids.spark322.RapidsShuffleManager |
| 3.3.0 | com.nvidia.spark.rapids.spark330.RapidsShuffleManager |
| Databricks 7.3| com.nvidia.spark.rapids.spark301db.RapidsShuffleManager |
| Databricks 9.1| com.nvidia.spark.rapids.spark312db.RapidsShuffleManager |
Expand Down
1 change: 1 addition & 0 deletions jenkins/spark-premerge-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ mvn_verify() {
# don't skip tests
env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Dbuildver=320 clean install -Drat.skip=true -Dmaven.javadoc.skip=true -Dskip -Dmaven.scalastyle.skip=true -Dcuda.version=$CUDA_CLASSIFIER -Dpytest.TEST_TAGS='' -pl '!tools'
env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Dbuildver=321 clean install -Drat.skip=true -DskipTests -Dmaven.javadoc.skip=true -Dskip -Dmaven.scalastyle.skip=true -Dcuda.version=$CUDA_CLASSIFIER -pl aggregator -am
env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Dbuildver=322 clean install -Drat.skip=true -DskipTests -Dmaven.javadoc.skip=true -Dskip -Dmaven.scalastyle.skip=true -Dcuda.version=$CUDA_CLASSIFIER -pl aggregator -am
env -u SPARK_HOME mvn -U -B $MVN_URM_MIRROR -Dbuildver=330 clean install -Drat.skip=true -DskipTests -Dmaven.javadoc.skip=true -Dskip -Dmaven.scalastyle.skip=true -Dcuda.version=$CUDA_CLASSIFIER -pl aggregator -am

# Here run Python integration tests tagged with 'premerge_ci_1' only, that would help balance test duration and memory
Expand Down
2 changes: 1 addition & 1 deletion jenkins/version-def.sh
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ echo "CUDF_VER: $CUDF_VER, CUDA_CLASSIFIER: $CUDA_CLASSIFIER, PROJECT_VER: $PROJ
SPARK_VER: $SPARK_VER, SCALA_BINARY_VER: $SCALA_BINARY_VER"


SPARK_SHIM_VERSIONS_STR=${SPARK_SHIM_VERSIONS_STR:-"301 302 303 304 311 311cdh 312 313 320 321 330"}
SPARK_SHIM_VERSIONS_STR=${SPARK_SHIM_VERSIONS_STR:-"301 302 303 304 311 311cdh 312 313 320 321 322 330"}

IFS=" " <<< $SPARK_SHIM_VERSIONS_STR read -r -a SPARK_SHIM_VERSIONS

Expand Down
56 changes: 56 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,7 @@
<source>${project.basedir}/src/main/311until320-all/scala</source>
<source>${project.basedir}/src/main/311until320-noncdh/scala</source>
<source>${project.basedir}/src/main/311until320-nondb/scala</source>
<source>${project.basedir}/src/main/311until330-all/scala</source>
<source>${project.basedir}/src/main/pre320-treenode/scala</source>
</sources>
</configuration>
Expand Down Expand Up @@ -405,6 +406,7 @@
<source>${project.basedir}/src/main/311+-all/scala</source>
<source>${project.basedir}/src/main/311until320-noncdh/scala</source>
<source>${project.basedir}/src/main/31xdb/scala</source>
<source>${project.basedir}/src/main/311until330-all/scala</source>
<source>${project.basedir}/src/main/post320-treenode/scala</source>
</sources>
</configuration>
Expand Down Expand Up @@ -450,6 +452,7 @@
<source>${project.basedir}/src/main/311until320-all/scala</source>
<source>${project.basedir}/src/main/311until320-noncdh/scala</source>
<source>${project.basedir}/src/main/311until320-nondb/scala</source>
<source>${project.basedir}/src/main/311until330-all/scala</source>
<source>${project.basedir}/src/main/pre320-treenode/scala</source>
</sources>
</configuration>
Expand Down Expand Up @@ -498,6 +501,7 @@
<source>${project.basedir}/src/main/311until320-all/scala</source>
<source>${project.basedir}/src/main/311until320-noncdh/scala</source>
<source>${project.basedir}/src/main/311until320-nondb/scala</source>
<source>${project.basedir}/src/main/311until330-all/scala</source>
<source>${project.basedir}/src/main/pre320-treenode/scala</source>
</sources>
</configuration>
Expand Down Expand Up @@ -541,7 +545,9 @@
<source>${project.basedir}/src/main/301until330-all/scala</source>
<source>${project.basedir}/src/main/311+-all/scala</source>
<source>${project.basedir}/src/main/311+-nondb/scala</source>
<source>${project.basedir}/src/main/311until330-all/scala</source>
<source>${project.basedir}/src/main/320+/scala</source>
<source>${project.basedir}/src/main/320until322/scala</source>
<source>${project.basedir}/src/main/post320-treenode/scala</source>
</sources>
</configuration>
Expand Down Expand Up @@ -584,7 +590,54 @@
<source>${project.basedir}/src/main/301until330-all/scala</source>
<source>${project.basedir}/src/main/311+-all/scala</source>
<source>${project.basedir}/src/main/311+-nondb/scala</source>
<source>${project.basedir}/src/main/311until330-all/scala</source>
<source>${project.basedir}/src/main/320+/scala</source>
<source>${project.basedir}/src/main/320until322/scala</source>
<source>${project.basedir}/src/main/post320-treenode/scala</source>
</sources>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<modules>
<module>tools</module>
<module>aggregator</module>
<module>tests-spark310+</module>
</modules>
</profile>
<profile>
<id>release322</id>
<activation>
<property>
<name>buildver</name>
<value>322</value>
</property>
</activation>
<properties>
<spark.version>${spark322.version}</spark.version>
<spark.test.version>${spark322.version}</spark.test.version>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<executions>
<execution>
<id>add-profile-src-31+</id>
<goals><goal>add-source</goal></goals>
<phase>generate-sources</phase>
<configuration>
<sources>
<source>${project.basedir}/src/main/301+-nondb/scala</source>
<source>${project.basedir}/src/main/301until330-all/scala</source>
<source>${project.basedir}/src/main/311+-all/scala</source>
<source>${project.basedir}/src/main/311+-nondb/scala</source>
<source>${project.basedir}/src/main/311until330-all/scala</source>
<source>${project.basedir}/src/main/320+/scala</source>
<source>${project.basedir}/src/main/322+/scala</source>
<source>${project.basedir}/src/main/post320-treenode/scala</source>
</sources>
</configuration>
Expand Down Expand Up @@ -627,6 +680,7 @@
<source>${project.basedir}/src/main/311+-all/scala</source>
<source>${project.basedir}/src/main/311+-nondb/scala</source>
<source>${project.basedir}/src/main/320+/scala</source>
<source>${project.basedir}/src/main/322+/scala</source>
<source>${project.basedir}/src/main/330+/scala</source>
<source>${project.basedir}/src/main/post320-treenode/scala</source>
</sources>
Expand Down Expand Up @@ -675,6 +729,7 @@
<source>${project.basedir}/src/main/311cdh/scala</source>
<source>${project.basedir}/src/main/311until320-all/scala</source>
<source>${project.basedir}/src/main/311until320-nondb/scala</source>
<source>${project.basedir}/src/main/311until330-all/scala</source>
<source>${project.basedir}/src/main/pre320-treenode/scala</source>
</sources>
</configuration>
Expand Down Expand Up @@ -783,6 +838,7 @@
<spark313.version>3.1.3-SNAPSHOT</spark313.version>
<spark320.version>3.2.0</spark320.version>
<spark321.version>3.2.1-SNAPSHOT</spark321.version>
<spark322.version>3.2.2-SNAPSHOT</spark322.version>
<spark330.version>3.3.0-SNAPSHOT</spark330.version>
<mockito.version>3.6.0</mockito.version>
<scala.plugin.version>4.3.0</scala.plugin.version>
Expand Down
4 changes: 4 additions & 0 deletions shims/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@
<id>release321</id>
<modules><module>spark321</module></modules>
</profile>
<profile>
<id>release322</id>
<modules><module>spark322</module></modules>
</profile>
</profiles>

<dependencies>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -21,7 +21,6 @@ import com.nvidia.spark.rapids.shims.v2._
import org.apache.parquet.schema.MessageType

import org.apache.spark.sql.execution.datasources.parquet.ParquetFilters
import org.apache.spark.sql.internal.SQLConf

class Spark301Shims extends Spark30XShims with Spark30Xuntil33XShims {

Expand All @@ -35,7 +34,8 @@ class Spark301Shims extends Spark30XShims with Spark30Xuntil33XShims {
pushDownStartWith: Boolean,
pushDownInFilterThreshold: Int,
caseSensitive: Boolean,
datetimeRebaseMode: SQLConf.LegacyBehaviorPolicy.Value): ParquetFilters = {
lookupFileMeta: String => String,
dateTimeRebaseModeFromConf: String): ParquetFilters = {
new ParquetFilters(schema, pushDownDate, pushDownTimestamp, pushDownDecimal, pushDownStartWith,
pushDownInFilterThreshold, caseSensitive)
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -21,7 +21,6 @@ import com.nvidia.spark.rapids.shims.v2._
import org.apache.parquet.schema.MessageType

import org.apache.spark.sql.execution.datasources.parquet.ParquetFilters
import org.apache.spark.sql.internal.SQLConf

class Spark302Shims extends Spark30XShims with Spark30Xuntil33XShims {

Expand All @@ -35,7 +34,8 @@ class Spark302Shims extends Spark30XShims with Spark30Xuntil33XShims {
pushDownStartWith: Boolean,
pushDownInFilterThreshold: Int,
caseSensitive: Boolean,
datetimeRebaseMode: SQLConf.LegacyBehaviorPolicy.Value): ParquetFilters = {
lookupFileMeta: String => String,
dateTimeRebaseModeFromConf: String): ParquetFilters = {
new ParquetFilters(schema, pushDownDate, pushDownTimestamp, pushDownDecimal, pushDownStartWith,
pushDownInFilterThreshold, caseSensitive)
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -21,7 +21,6 @@ import com.nvidia.spark.rapids.shims.v2._
import org.apache.parquet.schema.MessageType

import org.apache.spark.sql.execution.datasources.parquet.ParquetFilters
import org.apache.spark.sql.internal.SQLConf

class Spark303Shims extends Spark30XShims with Spark30Xuntil33XShims {

Expand All @@ -35,7 +34,8 @@ class Spark303Shims extends Spark30XShims with Spark30Xuntil33XShims {
pushDownStartWith: Boolean,
pushDownInFilterThreshold: Int,
caseSensitive: Boolean,
datetimeRebaseMode: SQLConf.LegacyBehaviorPolicy.Value): ParquetFilters = {
lookupFileMeta: String => String,
dateTimeRebaseModeFromConf: String): ParquetFilters = {
new ParquetFilters(schema, pushDownDate, pushDownTimestamp, pushDownDecimal, pushDownStartWith,
pushDownInFilterThreshold, caseSensitive)
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -20,8 +20,8 @@ import com.nvidia.spark.rapids.ShimVersion
import com.nvidia.spark.rapids.shims.v2._
import org.apache.parquet.schema.MessageType

import org.apache.spark.sql.execution.datasources.DataSourceUtils
import org.apache.spark.sql.execution.datasources.parquet.ParquetFilters
import org.apache.spark.sql.internal.SQLConf

class Spark304Shims extends Spark30XShims with Spark30Xuntil33XShims {

Expand All @@ -35,7 +35,11 @@ class Spark304Shims extends Spark30XShims with Spark30Xuntil33XShims {
pushDownStartWith: Boolean,
pushDownInFilterThreshold: Int,
caseSensitive: Boolean,
datetimeRebaseMode: SQLConf.LegacyBehaviorPolicy.Value): ParquetFilters =
lookupFileMeta: String => String,
dateTimeRebaseModeFromConf: String): ParquetFilters = {
val datetimeRebaseMode = DataSourceUtils
.datetimeRebaseMode(lookupFileMeta, dateTimeRebaseModeFromConf)
new ParquetFilters(schema, pushDownDate, pushDownTimestamp, pushDownDecimal, pushDownStartWith,
pushDownInFilterThreshold, caseSensitive, datetimeRebaseMode)
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -21,7 +21,6 @@ import com.nvidia.spark.rapids.shims.v2._
import org.apache.parquet.schema.MessageType

import org.apache.spark.sql.execution.datasources.parquet.ParquetFilters
import org.apache.spark.sql.internal.SQLConf

class Spark311Shims extends Spark31XShims with Spark30Xuntil33XShims {

Expand All @@ -37,7 +36,8 @@ class Spark311Shims extends Spark31XShims with Spark30Xuntil33XShims {
pushDownStartWith: Boolean,
pushDownInFilterThreshold: Int,
caseSensitive: Boolean,
datetimeRebaseMode: SQLConf.LegacyBehaviorPolicy.Value): ParquetFilters = {
lookupFileMeta: String => String,
dateTimeRebaseModeFromConf: String): ParquetFilters = {
new ParquetFilters(schema, pushDownDate, pushDownTimestamp, pushDownDecimal, pushDownStartWith,
pushDownInFilterThreshold, caseSensitive)
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -21,7 +21,6 @@ import com.nvidia.spark.rapids.shims.v2._
import org.apache.parquet.schema.MessageType

import org.apache.spark.sql.execution.datasources.parquet.ParquetFilters
import org.apache.spark.sql.internal.SQLConf

class Spark311CDHShims extends Spark31XShims with Spark30Xuntil33XShims {

Expand All @@ -37,7 +36,8 @@ class Spark311CDHShims extends Spark31XShims with Spark30Xuntil33XShims {
pushDownStartWith: Boolean,
pushDownInFilterThreshold: Int,
caseSensitive: Boolean,
datetimeRebaseMode: SQLConf.LegacyBehaviorPolicy.Value): ParquetFilters = {
lookupFileMeta: String => String,
dateTimeRebaseModeFromConf: String): ParquetFilters = {
new ParquetFilters(schema, pushDownDate, pushDownTimestamp, pushDownDecimal, pushDownStartWith,
pushDownInFilterThreshold, caseSensitive)
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -21,7 +21,6 @@ import com.nvidia.spark.rapids.shims.v2._
import org.apache.parquet.schema.MessageType

import org.apache.spark.sql.execution.datasources.parquet.ParquetFilters
import org.apache.spark.sql.internal.SQLConf

class Spark312Shims extends Spark31XShims with Spark30Xuntil33XShims {

Expand All @@ -37,7 +36,8 @@ class Spark312Shims extends Spark31XShims with Spark30Xuntil33XShims {
pushDownStartWith: Boolean,
pushDownInFilterThreshold: Int,
caseSensitive: Boolean,
datetimeRebaseMode: SQLConf.LegacyBehaviorPolicy.Value): ParquetFilters = {
lookupFileMeta: String => String,
dateTimeRebaseModeFromConf: String): ParquetFilters = {
new ParquetFilters(schema, pushDownDate, pushDownTimestamp, pushDownDecimal, pushDownStartWith,
pushDownInFilterThreshold, caseSensitive)
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -20,8 +20,8 @@ import com.nvidia.spark.rapids._
import com.nvidia.spark.rapids.shims.v2._
import org.apache.parquet.schema.MessageType

import org.apache.spark.sql.execution.datasources.DataSourceUtils
import org.apache.spark.sql.execution.datasources.parquet.ParquetFilters
import org.apache.spark.sql.internal.SQLConf

class Spark312dbShims extends Spark31XdbShims with Spark30Xuntil33XShims {

Expand All @@ -35,7 +35,11 @@ class Spark312dbShims extends Spark31XdbShims with Spark30Xuntil33XShims {
pushDownStartWith: Boolean,
pushDownInFilterThreshold: Int,
caseSensitive: Boolean,
datetimeRebaseMode: SQLConf.LegacyBehaviorPolicy.Value): ParquetFilters =
lookupFileMeta: String => String,
dateTimeRebaseModeFromConf: String): ParquetFilters = {
val datetimeRebaseMode = DataSourceUtils
.datetimeRebaseMode(lookupFileMeta, dateTimeRebaseModeFromConf)
new ParquetFilters(schema, pushDownDate, pushDownTimestamp, pushDownDecimal, pushDownStartWith,
pushDownInFilterThreshold, caseSensitive, datetimeRebaseMode)
}
}
Loading

0 comments on commit 9281caa

Please sign in to comment.