diff --git a/integration_tests/src/main/python/date_time_test.py b/integration_tests/src/main/python/date_time_test.py index 9e2e98006ab..0c877f00238 100644 --- a/integration_tests/src/main/python/date_time_test.py +++ b/integration_tests/src/main/python/date_time_test.py @@ -17,7 +17,7 @@ from conftest import is_utc, is_supported_time_zone, get_test_tz from data_gen import * from datetime import date, datetime, timezone -from marks import ignore_order, incompat, allow_non_gpu, datagen_overrides, tz_sensitive_test +from marks import allow_non_gpu, datagen_overrides, disable_ansi_mode, ignore_order, incompat, tz_sensitive_test from pyspark.sql.types import * from spark_session import with_cpu_session, is_before_spark_330, is_before_spark_350 import pyspark.sql.functions as f @@ -91,6 +91,8 @@ def fun(spark): assert_gpu_and_cpu_are_equal_collect(fun) + +@disable_ansi_mode # ANSI mode tested separately. # Should specify `spark.sql.legacy.interval.enabled` to test `DateAddInterval` after Spark 3.2.0, # refer to https://issues.apache.org/jira/browse/SPARK-34896 # [SPARK-34896][SQL] Return day-time interval from dates subtraction @@ -437,6 +439,8 @@ def test_string_unix_timestamp_ansi_exception(): error_message="Exception", conf=ansi_enabled_conf) + +@disable_ansi_mode # ANSI mode is tested separately. @tz_sensitive_test @pytest.mark.skipif(not is_supported_time_zone(), reason="not all time zones are supported now, refer to https://github.com/NVIDIA/spark-rapids/issues/6839, please update after all time zones are supported") @pytest.mark.parametrize('parser_policy', ["CORRECTED", "EXCEPTION"], ids=idfn) @@ -561,6 +565,8 @@ def test_date_format_maybe_incompat(data_gen, date_format): assert_gpu_and_cpu_are_equal_collect( lambda spark : unary_op_df(spark, data_gen).selectExpr("date_format(a, '{}')".format(date_format)), conf) + +@disable_ansi_mode # ANSI mode tested separately. # Reproduce conditions for https://github.com/NVIDIA/spark-rapids/issues/5670 # where we had a failure due to GpuCast canonicalization with timezone. # In this case it was doing filter after project, the way I get that to happen is by adding in the @@ -594,6 +600,7 @@ def test_unsupported_fallback_date_format(data_gen): conf) +@disable_ansi_mode # Failure cases for ANSI mode are tested separately. @allow_non_gpu('ProjectExec') def test_unsupported_fallback_to_date(): date_gen = StringGen(pattern="2023-08-01") diff --git a/integration_tests/src/main/python/dpp_test.py b/integration_tests/src/main/python/dpp_test.py index cd4610cf95c..b362a4175f3 100644 --- a/integration_tests/src/main/python/dpp_test.py +++ b/integration_tests/src/main/python/dpp_test.py @@ -19,7 +19,7 @@ from asserts import assert_cpu_and_gpu_are_equal_collect_with_capture, assert_gpu_and_cpu_are_equal_collect from conftest import spark_tmp_table_factory from data_gen import * -from marks import ignore_order, allow_non_gpu, datagen_overrides +from marks import ignore_order, allow_non_gpu, datagen_overrides, disable_ansi_mode from spark_session import is_before_spark_320, with_cpu_session, is_before_spark_312, is_databricks_runtime, is_databricks113_or_later # non-positive values here can produce a degenerative join, so here we ensure that most values are @@ -167,7 +167,7 @@ def fn(spark): ''' ] - +@disable_ansi_mode # https://github.com/NVIDIA/spark-rapids/issues/5114 # When BroadcastExchangeExec is available on filtering side, and it can be reused: # DynamicPruningExpression(InSubqueryExec(value, GpuSubqueryBroadcastExec))) @ignore_order @@ -198,6 +198,7 @@ def test_dpp_reuse_broadcast_exchange(spark_tmp_table_factory, store_format, s_i conf=dict(_exchange_reuse_conf + [('spark.sql.adaptive.enabled', aqe_enabled)])) +@disable_ansi_mode # https://github.com/NVIDIA/spark-rapids/issues/5114 # The SubqueryBroadcast can work on GPU even if the scan who holds it fallbacks into CPU. @ignore_order @pytest.mark.allow_non_gpu('FileSourceScanExec') @@ -215,6 +216,7 @@ def test_dpp_reuse_broadcast_exchange_cpu_scan(spark_tmp_table_factory): ('spark.rapids.sql.format.parquet.read.enabled', 'false')])) +@disable_ansi_mode # https://github.com/NVIDIA/spark-rapids/issues/5114 # When BroadcastExchange is not available and non-broadcast DPPs are forbidden, Spark will bypass it: # DynamicPruningExpression(Literal.TrueLiteral) @ignore_order @@ -238,6 +240,7 @@ def test_dpp_bypass(spark_tmp_table_factory, store_format, s_index, aqe_enabled) conf=dict(_bypass_conf + [('spark.sql.adaptive.enabled', aqe_enabled)])) +@disable_ansi_mode # https://github.com/NVIDIA/spark-rapids/issues/5114 # When BroadcastExchange is not available, but it is still worthwhile to run DPP, # then Spark will plan an extra Aggregate to collect filtering values: # DynamicPruningExpression(InSubqueryExec(value, SubqueryExec(Aggregate(...)))) @@ -261,6 +264,7 @@ def test_dpp_via_aggregate_subquery(spark_tmp_table_factory, store_format, s_ind conf=dict(_no_exchange_reuse_conf + [('spark.sql.adaptive.enabled', aqe_enabled)])) +@disable_ansi_mode # https://github.com/NVIDIA/spark-rapids/issues/5114 # When BroadcastExchange is not available, Spark will skip DPP if there is no potential benefit @ignore_order @pytest.mark.parametrize('store_format', ['parquet', 'orc'], ids=idfn) @@ -321,6 +325,8 @@ def create_dim_table_for_like(spark): exist_classes, conf=dict(_exchange_reuse_conf + [('spark.sql.adaptive.enabled', aqe_enabled)])) + +@disable_ansi_mode # https://github.com/NVIDIA/spark-rapids/issues/5114 # Test handling DPP expressions from a HashedRelation that rearranges columns @pytest.mark.parametrize('aqe_enabled', [ 'false', @@ -351,6 +357,8 @@ def setup_tables(spark): ("spark.rapids.sql.castStringToTimestamp.enabled", "true"), ("spark.rapids.sql.hasExtendedYearValues", "false")])) + +@disable_ansi_mode # https://github.com/NVIDIA/spark-rapids/issues/5114 # Test handling DPP subquery that could broadcast EmptyRelation rather than a GPU serialized batch @pytest.mark.parametrize('aqe_enabled', [ 'false', diff --git a/integration_tests/src/main/python/grouping_sets_test.py b/integration_tests/src/main/python/grouping_sets_test.py index 24f8dd1810c..0d1593916d3 100644 --- a/integration_tests/src/main/python/grouping_sets_test.py +++ b/integration_tests/src/main/python/grouping_sets_test.py @@ -41,6 +41,8 @@ 'GROUP BY a, GROUPING SETS((a, b), (a), (), (a, b), (a), (b), ())', ] + +@disable_ansi_mode # https://github.com/NVIDIA/spark-rapids/issues/5114 # test nested syntax of grouping set, rollup and cube @ignore_order @pytest.mark.parametrize('data_gen', [_grouping_set_gen], ids=idfn) diff --git a/integration_tests/src/main/python/map_test.py b/integration_tests/src/main/python/map_test.py index d5e49d5eb65..55506f6c60c 100644 --- a/integration_tests/src/main/python/map_test.py +++ b/integration_tests/src/main/python/map_test.py @@ -18,7 +18,7 @@ from conftest import is_not_utc from data_gen import * from conftest import is_databricks_runtime -from marks import allow_non_gpu, ignore_order, datagen_overrides +from marks import allow_non_gpu, datagen_overrides, disable_ansi_mode, ignore_order from spark_session import * from pyspark.sql.functions import create_map, col, lit, row_number from pyspark.sql.types import * @@ -138,6 +138,7 @@ def test_get_map_value_string_keys(data_gen): for key in numeric_key_gens for value in get_map_value_gens()] +@disable_ansi_mode # ANSI mode failures are tested separately. @pytest.mark.parametrize('data_gen', numeric_key_map_gens, ids=idfn) def test_get_map_value_numeric_keys(data_gen): key_gen = data_gen._key_gen @@ -151,6 +152,7 @@ def test_get_map_value_numeric_keys(data_gen): 'a[999]')) +@disable_ansi_mode # ANSI mode failures are tested separately. @pytest.mark.parametrize('data_gen', supported_key_map_gens, ids=idfn) @allow_non_gpu(*non_utc_allow) def test_get_map_value_supported_keys(data_gen): @@ -174,6 +176,7 @@ def test_get_map_value_fallback_keys(data_gen): cpu_fallback_class_name="GetMapValue") +@disable_ansi_mode # ANSI mode failures are tested separately. @pytest.mark.parametrize('key_gen', numeric_key_gens, ids=idfn) def test_basic_scalar_map_get_map_value(key_gen): def query_map_scalar(spark): @@ -639,6 +642,8 @@ def test_map_element_at_ansi_null(data_gen): 'element_at(a, "NOT_FOUND")'), conf=ansi_enabled_conf) + +@disable_ansi_mode # ANSI mode failures are tested separately. @pytest.mark.parametrize('data_gen', map_gens_sample, ids=idfn) @allow_non_gpu(*non_utc_allow) def test_transform_values(data_gen):