From 31b168e440df1aecd32479b310690af87c0ac0ee Mon Sep 17 00:00:00 2001 From: Liangcai Li Date: Mon, 24 May 2021 13:10:38 +0800 Subject: [PATCH] Fixing the failing test `test_window` on DB (#2484) By making the 'outReference' be lazy. Because on Databricks, it will get some invalid expressions in the 'projectList' if binding the reference on driver side. E.g. 'none#0L', 'none@1L'. Signed-off-by: Firestarman --- integration_tests/src/main/python/udf_cudf_test.py | 4 +--- .../spark/rapids/shims/spark301db/GpuWindowInPandasExec.scala | 2 +- .../python/shims/spark310db/GpuWindowInPandasExec.scala | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/integration_tests/src/main/python/udf_cudf_test.py b/integration_tests/src/main/python/udf_cudf_test.py index 802ff9184dd..7fbf76664a8 100644 --- a/integration_tests/src/main/python/udf_cudf_test.py +++ b/integration_tests/src/main/python/udf_cudf_test.py @@ -14,7 +14,7 @@ import pytest -from conftest import is_at_least_precommit_run, is_databricks_runtime +from conftest import is_at_least_precommit_run from pyspark.sql.pandas.utils import require_minimum_pyarrow_version, require_minimum_pandas_version try: @@ -275,8 +275,6 @@ def gpu_run(spark): # ======= Test Window In Pandas ======= @cudf_udf -@pytest.mark.xfail(condition=is_databricks_runtime(), - reason='https://github.com/NVIDIA/spark-rapids/issues/2372') def test_window(enable_cudf_udf): @pandas_udf("int") def _sum_cpu_func(v: pd.Series) -> int: diff --git a/shims/spark301db/src/main/scala/com/nvidia/spark/rapids/shims/spark301db/GpuWindowInPandasExec.scala b/shims/spark301db/src/main/scala/com/nvidia/spark/rapids/shims/spark301db/GpuWindowInPandasExec.scala index fe2df6ae9ad..3bc3f31e897 100644 --- a/shims/spark301db/src/main/scala/com/nvidia/spark/rapids/shims/spark301db/GpuWindowInPandasExec.scala +++ b/shims/spark301db/src/main/scala/com/nvidia/spark/rapids/shims/spark301db/GpuWindowInPandasExec.scala @@ -54,7 +54,7 @@ case class GpuWindowInPandasExec( } } - private val outReferences = { + private lazy val outReferences = { val allExpressions = windowFramesWithExpressions.map(_._2).flatten val references = allExpressions.zipWithIndex.map { case (e, i) => // Results of window expressions will be on the right side of child's output diff --git a/shims/spark310db/src/main/scala/org/apache/spark/sql/rapids/execution/python/shims/spark310db/GpuWindowInPandasExec.scala b/shims/spark310db/src/main/scala/org/apache/spark/sql/rapids/execution/python/shims/spark310db/GpuWindowInPandasExec.scala index b4eb99c7078..0adf2faea7f 100644 --- a/shims/spark310db/src/main/scala/org/apache/spark/sql/rapids/execution/python/shims/spark310db/GpuWindowInPandasExec.scala +++ b/shims/spark310db/src/main/scala/org/apache/spark/sql/rapids/execution/python/shims/spark310db/GpuWindowInPandasExec.scala @@ -64,7 +64,7 @@ case class GpuWindowInPandasExec( } } - private val outReferences = { + private lazy val outReferences = { val allExpressions = windowFramesWithExpressions.map(_._2).flatten val references = allExpressions.zipWithIndex.map { case (e, i) => // Results of window expressions will be on the right side of child's output