Skip to content

Commit

Permalink
Merge branch 'branch-24.02' into GpuSem_try_acquire
Browse files Browse the repository at this point in the history
  • Loading branch information
revans2 committed Jan 30, 2024
2 parents a293120 + 7e48cc9 commit 7311496
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 17 deletions.
15 changes: 11 additions & 4 deletions jenkins/spark-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -304,10 +304,17 @@ if [[ $TEST_MODE == "DEFAULT" ]]; then
PYSP_TEST_spark_shuffle_manager=com.nvidia.spark.rapids.${SHUFFLE_SPARK_SHIM}.RapidsShuffleManager \
./run_pyspark_from_build.sh

SPARK_SHELL_SMOKE_TEST=1 \
PYSP_TEST_spark_jars_packages=com.nvidia:rapids-4-spark_${SCALA_BINARY_VER}:${PROJECT_VER} \
PYSP_TEST_spark_jars_repositories=${PROJECT_REPO} \
./run_pyspark_from_build.sh
# As '--packages' only works on the default cuda11 jar, it does not support classifiers
# refer to issue : https://issues.apache.org/jira/browse/SPARK-20075
# "$CLASSIFIER" == ''" is usally for the case running by developers,
# while "$CLASSIFIER" == "cuda11" is for the case running on CI.
# We expect to run packages test for both cases
if [[ "$CLASSIFIER" == "" || "$CLASSIFIER" == "cuda11" ]]; then
SPARK_SHELL_SMOKE_TEST=1 \
PYSP_TEST_spark_jars_packages=com.nvidia:rapids-4-spark_${SCALA_BINARY_VER}:${PROJECT_VER} \
PYSP_TEST_spark_jars_repositories=${PROJECT_REPO} \
./run_pyspark_from_build.sh
fi

# ParquetCachedBatchSerializer cache_test
PYSP_TEST_spark_sql_cache_serializer=com.nvidia.spark.ParquetCachedBatchSerializer \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -653,15 +653,6 @@ case class GpuExtractChunk32(
override def children: Seq[Expression] = Seq(data)
}

object GpuExtractChunk32 {
/** Build the aggregate expressions for summing the four 32-bit chunks of a 128-bit decimal. */
def chunkSumExprs(): Seq[CudfAggregate] = (0 until 4).map { i =>
// first three chunk columns are UINT32, so they are promoted to UINT64 during aggregation.
val colType = if (i < 3) GpuUnsignedLongType else LongType
new CudfSum(colType)
}
}

/**
* Reassembles a 128-bit value from four separate 64-bit sum results
* @param chunkAttrs attributes for the four 64-bit sum chunks ordered from least significant to
Expand Down Expand Up @@ -1100,7 +1091,7 @@ case class GpuDecimal128Sum(
chunks :+ GpuIsNull(child)
}

private lazy val updateSumChunks = GpuExtractChunk32.chunkSumExprs
private lazy val updateSumChunks = (0 until 4).map(_ => new CudfSum(LongType))

override lazy val updateAggregates: Seq[CudfAggregate] = updateSumChunks :+ updateIsEmpty

Expand All @@ -1119,7 +1110,7 @@ case class GpuDecimal128Sum(
chunks ++ Seq(isEmpty, GpuIsNull(sum))
}

private lazy val mergeSumChunks = GpuExtractChunk32.chunkSumExprs()
private lazy val mergeSumChunks = (0 until 4).map(_ => new CudfSum(LongType))

// To be able to do decimal overflow detection, we need a CudfSum that does **not** ignore nulls.
// Cudf does not have such an aggregation, so for merge we have to work around that similar to
Expand Down Expand Up @@ -1484,7 +1475,7 @@ case class GpuDecimal128Average(child: Expression, dt: DecimalType)
chunks :+ forCount
}

private lazy val updateSumChunks = GpuExtractChunk32.chunkSumExprs()
private lazy val updateSumChunks = (0 until 4).map(_ => new CudfSum(LongType))

override lazy val updateAggregates: Seq[CudfAggregate] = updateSumChunks :+ updateCount

Expand All @@ -1502,7 +1493,7 @@ case class GpuDecimal128Average(child: Expression, dt: DecimalType)
chunks ++ Seq(count, GpuIsNull(sum))
}

private lazy val mergeSumChunks = GpuExtractChunk32.chunkSumExprs()
private lazy val mergeSumChunks = (0 until 4).map(_ => new CudfSum(LongType))

override lazy val mergeAggregates: Seq[CudfAggregate] =
mergeSumChunks ++ Seq(mergeCount, mergeIsOverflow)
Expand Down

0 comments on commit 7311496

Please sign in to comment.