diff --git a/integration_tests/src/main/python/map_test.py b/integration_tests/src/main/python/map_test.py index aec4fd1f256..c797a017371 100644 --- a/integration_tests/src/main/python/map_test.py +++ b/integration_tests/src/main/python/map_test.py @@ -135,6 +135,12 @@ def test_get_map_value_timestamp_keys(data_gen): 'a[timestamp "2022-01-01"]', 'a[null]')) +def test_map_side_effects(): + assert_gpu_and_cpu_are_equal_collect( + lambda spark : spark.range(10).selectExpr( + 'id', + 'if(id == 0, null, map(id, id, id DIV 2, id)) as m'), + conf={'spark.sql.mapKeyDedupPolicy': 'EXCEPTION'}) @pytest.mark.parametrize('key_gen', [StringGen(nullable=False), IntegerGen(nullable=False), basic_struct_gen], ids=idfn) @pytest.mark.parametrize('value_gen', [StringGen(nullable=True), IntegerGen(nullable=True), basic_struct_gen], ids=idfn) diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/complexTypeCreator.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/complexTypeCreator.scala index ce4c673442c..68d47472263 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/complexTypeCreator.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/complexTypeCreator.scala @@ -99,6 +99,9 @@ case class GpuCreateMap( } } + override lazy val hasSideEffects: Boolean = + GpuCreateMap.exceptionOnDupKeys || super.hasSideEffects + override def columnarEval(batch: ColumnarBatch): Any = { withResource(new Array[ColumnVector](children.size)) { columns => val numRows = batch.numRows() @@ -135,11 +138,14 @@ object GpuCreateMap extends Arm { SQLConf.get.getConf(SQLConf.LEGACY_CREATE_EMPTY_COLLECTION_USING_STRING_TYPE)) } + def exceptionOnDupKeys: Boolean = + SQLConf.get.getConf(SQLConf.MAP_KEY_DEDUP_POLICY) == + SQLConf.MapKeyDedupPolicy.EXCEPTION.toString + def createMapFromKeysValuesAsStructs(dataType: MapType, listsOfKeyValueStructs : ColumnView): GpuColumnVector = { withResource(listsOfKeyValueStructs.dropListDuplicatesWithKeysValues()) { deduped => - if (SQLConf.get.getConf(SQLConf.MAP_KEY_DEDUP_POLICY) == - SQLConf.MapKeyDedupPolicy.EXCEPTION.toString) { + if (exceptionOnDupKeys) { // Compare child data row count before and after removing duplicates to determine // if there were duplicates. withResource(deduped.getChildColumnView(0)) { a =>