Skip to content

Commit

Permalink
Batch scalastyle checks across all modules upfront (#3433)
Browse files Browse the repository at this point in the history
- replaces scalastyle-maven-plugin with an ant task, uses compact wildcard paths so we don't need to hardcode evolving source code layout
- separates build info generation into pluginManagement to be inherited by submodules while limiting scalastyle task to the parent pom only
- checks all modules in one go upfront so the developers don't have to wait for failures incurred later after lengthy scala builds
- fixes codestyle violations accummulated while the check was incomplete
    
Fixes #3388
    
Signed-off-by: Gera Shegalov <gera@apache.org>
  • Loading branch information
gerashegalov authored Sep 11, 2021
1 parent 1e62e02 commit 906af85
Show file tree
Hide file tree
Showing 32 changed files with 86 additions and 139 deletions.
1 change: 1 addition & 0 deletions dist/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1737,6 +1737,7 @@
<configuration>
<excludes>
<exclude>com.nvidia.spark.rapids.SparkShimServiceProvider.*</exclude>
<exclude>dependency-reduced-pom.xml</exclude>
<exclude>*.txt</exclude>
<exclude>*.md</exclude>
</excludes>
Expand Down
7 changes: 0 additions & 7 deletions integration_tests/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -238,13 +238,6 @@
<groupId>org.scalatest</groupId>
<artifactId>scalatest-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.scalastyle</groupId>
<artifactId>scalastyle-maven-plugin</artifactId>
<configuration>
<includeTestSourceDirectory>true</includeTestSourceDirectory>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
Expand Down
96 changes: 60 additions & 36 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,27 @@
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<version>1.8</version>
<executions>
<execution>
<id>generate-build-info</id>
<phase>generate-resources</phase>
<configuration>
<!-- Execute the shell script to generate the plugin build information. -->
<target>
<mkdir dir="${project.build.directory}/extra-resources"/>
<mkdir dir="${project.build.directory}/tmp"/>
<exec executable="bash" failonerror="true" output="${project.build.directory}/extra-resources/rapids4spark-version-info.properties">
<arg value="${user.dir}/build/build-info"/>
<arg value="${project.version}"/>
<arg value="${cudf.version}"/>
</exec>
</target>
</configuration>
<goals>
<goal>run</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
Expand Down Expand Up @@ -901,30 +922,6 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.scalastyle</groupId>
<artifactId>scalastyle-maven-plugin</artifactId>
<version>1.0.0</version>
<configuration>
<verbose>false</verbose>
<failOnViolation>true</failOnViolation>
<includeTestSourceDirectory>false</includeTestSourceDirectory>
<failOnWarning>false</failOnWarning>
<sourceDirectory>${basedir}/src/main/scala</sourceDirectory>
<testSourceDirectory>${basedir}/src/test/scala</testSourceDirectory>
<configLocation>scalastyle-config.xml</configLocation>
<outputFile>${basedir}/target/scalastyle-output.xml</outputFile>
<inputEncoding>${project.build.sourceEncoding}</inputEncoding>
<outputEncoding>${project.reporting.outputEncoding}</outputEncoding>
</configuration>
<executions>
<execution>
<goals>
<goal>check</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.rat</groupId>
<artifactId>apache-rat-plugin</artifactId>
Expand Down Expand Up @@ -1001,28 +998,55 @@
</configuration>
</plugin>
<plugin>
<!--
This is an alternative implementation of the scalastyle check invocation,
a replacement for scalastyle-maven-plugin. It's motivated to address the following:
- All scala files are checked at once regardless of the module, so the developer
can focus on addressing violations without being distracted by the build issues
in-between
- We don't have to hardcode the source code roots added dynamically by other maven
plugins to the project
- The scalastyle launch cost is amortized across all modules
-->
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<inherited>false</inherited>
<executions>
<execution>
<phase>generate-resources</phase>
<id>scalastyle-all-modules</id>
<phase>verify</phase>
<goals><goal>run</goal></goals>
<configuration>
<!-- Execute the shell script to generate the plugin build information. -->
<target>
<mkdir dir="${project.build.directory}/extra-resources"/>
<mkdir dir="${project.build.directory}/tmp"/>
<exec executable="bash" failonerror="true" output="${project.build.directory}/extra-resources/rapids4spark-version-info.properties">
<arg value="${user.dir}/build/build-info"/>
<arg value="${project.version}"/>
<arg value="${cudf.version}"/>
</exec>
<pathconvert property="scalastyle.dirs" pathsep=" ">
<dirset dir="${project.basedir}" includes="**/src/main/scala"/>
<dirset dir="${project.basedir}" includes="**/src/main/*/scala"/>
<dirset dir="${project.basedir}" includes="**/src/test/scala"/>
<dirset dir="${project.basedir}" includes="**/src/test/*/scala"/>
</pathconvert>
<echo>Checking scalastyle for all modules using following paths:
${scalastyle.dirs}
</echo>
<java classname="org.scalastyle.Main" failonerror="true">
<arg line="--verbose false"/>
<arg line="--warnings false"/>
<arg line="--config scalastyle-config.xml"/>
<arg line="--xmlOutput ${project.basedir}/target/scalastyle-output.xml"/>
<arg line="--inputEncoding ${project.build.sourceEncoding}"/>
<arg line="--xmlEncoding ${project.reporting.outputEncoding}"/>
<arg line="${scalastyle.dirs}"/>
</java>
</target>
</configuration>
<goals>
<goal>run</goal>
</goals>
</execution>
</executions>
<dependencies>
<dependency>
<groupId>org.scalastyle</groupId>
<artifactId>scalastyle_${scala.binary.version}</artifactId>
<version>1.0.0</version>
</dependency>
</dependencies>
</plugin>
<plugin>
<groupId>org.jacoco</groupId>
Expand Down
4 changes: 0 additions & 4 deletions shims/spark301/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,6 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.scalastyle</groupId>
<artifactId>scalastyle-maven-plugin</artifactId>
</plugin>
</plugins>

<resources>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ import org.apache.spark.sql.rapids.execution.GpuBroadcastExchangeExecBaseWithFut

case class GpuBroadcastExchangeExec(
override val mode: BroadcastMode,
child: SparkPlan) extends GpuBroadcastExchangeExecBaseWithFuture(mode, child) with BroadcastExchangeLike {
child: SparkPlan)
extends GpuBroadcastExchangeExecBaseWithFuture(mode, child)
with BroadcastExchangeLike {

override def runId: UUID = _runId

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,11 @@ class GpuRunningWindowExecMeta(runningWindowFunctionExec: RunningWindowFunctionE
conf: RapidsConf,
parent: Option[RapidsMeta[_, _, _]],
rule: DataFromReplacementRule)
extends GpuBaseWindowExecMeta[RunningWindowFunctionExec](runningWindowFunctionExec, conf, parent, rule) {
extends GpuBaseWindowExecMeta[RunningWindowFunctionExec](runningWindowFunctionExec, conf,
parent, rule) {

override def getInputWindowExpressions: Seq[NamedExpression] = runningWindowFunctionExec.windowExpressionList
override def getInputWindowExpressions: Seq[NamedExpression] =
runningWindowFunctionExec.windowExpressionList
override def getPartitionSpecs: Seq[Expression] = runningWindowFunctionExec.partitionSpec
override def getOrderSpecs: Seq[SortOrder] = runningWindowFunctionExec.orderSpec
override def getResultColumnsOnly: Boolean = true
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -26,7 +26,8 @@ case class GpuShuffleExchangeExec(
override val outputPartitioning: Partitioning,
child: SparkPlan,
canChangeNumPartitions: Boolean)
extends GpuShuffleExchangeExecBaseWithMetrics(outputPartitioning, child) with ShuffleExchangeLike {
extends GpuShuffleExchangeExecBaseWithMetrics(outputPartitioning, child)
with ShuffleExchangeLike {

override def numMappers: Int = shuffleDependencyColumnar.rdd.getNumPartitions

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ class ProxyRapidsShuffleInternalManager(conf: SparkConf, isDriver: Boolean)
endPartition: Int,
context: TaskContext,
metrics: ShuffleReadMetricsReporter): ShuffleReader[K, C] = {
self.getReaderForRange(handle, startMapIndex, endMapIndex, startPartition, endPartition, context,
metrics)
self.getReaderForRange(handle, startMapIndex, endMapIndex, startPartition, endPartition,
context, metrics)
}

def getReader[K, C](
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,6 @@ import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan
import org.apache.spark.sql.execution.exchange.{ReusedExchangeExec, ShuffleExchangeExec}
import org.apache.spark.sql.execution.joins._
import org.apache.spark.sql.execution.python._
import org.apache.spark.sql.rapids.execution.TrampolineUtil
import org.apache.spark.sql.rapids.execution.python.shims.spark301db._
import org.apache.spark.sql.execution.window.WindowExecBase
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.rapids.{GpuFileSourceScanExec, GpuStringReplace, GpuTimeSub, ShuffleManagerShimBase}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
import org.apache.spark.sql.execution.python.PythonUDFRunner
import org.apache.spark.sql.rapids.execution.python._
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.rapids.execution.python._
import org.apache.spark.sql.types._
import org.apache.spark.sql.util.ArrowUtils
import org.apache.spark.sql.vectorized.ColumnarBatch
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, Distribution, Partitioning}
import org.apache.spark.sql.execution.UnaryExecNode
import org.apache.spark.sql.execution.python._
import org.apache.spark.sql.rapids.execution.python._
import org.apache.spark.sql.rapids.GpuAggregateExpression
import org.apache.spark.sql.rapids.execution.python._
import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
import org.apache.spark.sql.util.ArrowUtils
import org.apache.spark.sql.vectorized.ColumnarBatch
Expand Down
4 changes: 0 additions & 4 deletions shims/spark302/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,6 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.scalastyle</groupId>
<artifactId>scalastyle-maven-plugin</artifactId>
</plugin>
</plugins>

<resources>
Expand Down
4 changes: 0 additions & 4 deletions shims/spark303/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,6 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.scalastyle</groupId>
<artifactId>scalastyle-maven-plugin</artifactId>
</plugin>
</plugins>

<resources>
Expand Down
4 changes: 0 additions & 4 deletions shims/spark304/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,6 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.scalastyle</groupId>
<artifactId>scalastyle-maven-plugin</artifactId>
</plugin>
</plugins>

<resources>
Expand Down
4 changes: 0 additions & 4 deletions shims/spark311/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,6 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.scalastyle</groupId>
<artifactId>scalastyle-maven-plugin</artifactId>
</plugin>
</plugins>

<resources>
Expand Down
4 changes: 0 additions & 4 deletions shims/spark311cdh/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,6 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.scalastyle</groupId>
<artifactId>scalastyle-maven-plugin</artifactId>
</plugin>
</plugins>

<resources>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,11 @@ class GpuRunningWindowExecMeta(runningWindowFunctionExec: RunningWindowFunctionE
conf: RapidsConf,
parent: Option[RapidsMeta[_, _, _]],
rule: DataFromReplacementRule)
extends GpuBaseWindowExecMeta[RunningWindowFunctionExec](runningWindowFunctionExec, conf, parent, rule) {
extends GpuBaseWindowExecMeta[RunningWindowFunctionExec](runningWindowFunctionExec, conf,
parent, rule) {

override def getInputWindowExpressions: Seq[NamedExpression] = runningWindowFunctionExec.windowExpressionList
override def getInputWindowExpressions: Seq[NamedExpression] =
runningWindowFunctionExec.windowExpressionList
override def getPartitionSpecs: Seq[Expression] = runningWindowFunctionExec.partitionSpec
override def getOrderSpecs: Seq[SortOrder] = runningWindowFunctionExec.orderSpec
override def getResultColumnsOnly: Boolean = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import java.nio.ByteBuffer

import com.databricks.sql.execution.window.RunningWindowFunctionExec
import com.nvidia.spark.rapids._
import com.nvidia.spark.rapids.shims.spark311db._
import com.nvidia.spark.rapids.shims.v2.Spark30XShims
import org.apache.arrow.memory.ReferenceManager
import org.apache.arrow.vector.ValueVector
Expand Down Expand Up @@ -51,12 +52,12 @@ import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan
import org.apache.spark.sql.execution.exchange.{ENSURE_REQUIREMENTS, ReusedExchangeExec, ShuffleExchangeExec}
import org.apache.spark.sql.execution.joins._
import org.apache.spark.sql.execution.python._
import org.apache.spark.sql.rapids.execution.python.shims.spark311db._
import org.apache.spark.sql.execution.window.WindowExecBase
import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
import org.apache.spark.sql.rapids._
import org.apache.spark.sql.rapids.execution.{GpuBroadcastExchangeExecBase, GpuBroadcastNestedLoopJoinExecBase, GpuShuffleExchangeExecBase, JoinTypeChecks}
import org.apache.spark.sql.rapids.execution.python.{GpuAggregateInPandasExecMeta, GpuArrowEvalPythonExec, GpuMapInPandasExecMeta, GpuPythonUDF}
import org.apache.spark.sql.rapids.execution.python.shims.spark311db._
import org.apache.spark.sql.rapids.shims.spark311db._
import org.apache.spark.sql.sources.BaseRelation
import org.apache.spark.sql.types._
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ import org.apache.spark.sql.types._
import org.apache.spark.sql.util.ArrowUtils
import org.apache.spark.sql.vectorized.ColumnarBatch
import org.apache.spark.util.Utils
import org.apache.spark.sql.rapids.execution.python.GpuPythonUDF

/**
* Group Map UDF specific serializer for Databricks because they have a special GroupUDFSerializer.
Expand All @@ -72,7 +71,8 @@ class GpuGroupUDFArrowPythonRunner(
onDataWriteFinished: () => Unit,
override val pythonOutSchema: StructType,
minReadTargetBatchSize: Int)
extends GpuArrowPythonRunner(funcs, evalType, argOffsets, pythonInSchema, timeZoneId, conf, batchSize, onDataWriteFinished, pythonOutSchema, minReadTargetBatchSize) {
extends GpuArrowPythonRunner(funcs, evalType, argOffsets, pythonInSchema, timeZoneId, conf,
batchSize, onDataWriteFinished, pythonOutSchema, minReadTargetBatchSize) {

protected override def newWriterThread(
env: SparkEnv,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ package org.apache.spark.sql.rapids.shims.spark311db
import scala.concurrent.Future

import org.apache.spark.MapOutputStatistics

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.plans.logical.Statistics
import org.apache.spark.sql.catalyst.plans.physical.Partitioning
Expand Down
4 changes: 0 additions & 4 deletions shims/spark312/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,6 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.scalastyle</groupId>
<artifactId>scalastyle-maven-plugin</artifactId>
</plugin>
</plugins>

<resources>
Expand Down
4 changes: 0 additions & 4 deletions shims/spark313/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,6 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.scalastyle</groupId>
<artifactId>scalastyle-maven-plugin</artifactId>
</plugin>
</plugins>

<resources>
Expand Down
4 changes: 0 additions & 4 deletions shims/spark320/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,6 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.scalastyle</groupId>
<artifactId>scalastyle-maven-plugin</artifactId>
</plugin>
</plugins>

<resources>
Expand Down
Loading

0 comments on commit 906af85

Please sign in to comment.