Skip to content

Commit

Permalink
fixed failing tests (NVIDIA#840)
Browse files Browse the repository at this point in the history
Signed-off-by: Raza Jafri <rjafri@nvidia.com>

Co-authored-by: Raza Jafri <rjafri@nvidia.com>
  • Loading branch information
razajafri and razajafri authored Sep 25, 2020
1 parent 3863215 commit 36d478c
Showing 1 changed file with 11 additions and 4 deletions.
15 changes: 11 additions & 4 deletions integration_tests/src/main/python/cache_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,10 +147,13 @@ def do_join(spark):
# This issue is tracked by https://github.com/NVIDIA/spark-rapids/issues/133 in the plugin
DateGen(start=date(1582, 10, 15)),
TimestampGen()]
parquet_ts_write_options = ['INT96', 'TIMESTAMP_MICROS', 'TIMESTAMP_MILLIS']

@pytest.mark.parametrize('ts_write', parquet_ts_write_options)
@pytest.mark.parametrize('ts_rebase', ['CORRECTED', 'LEGACY'])
@pytest.mark.parametrize('data_gen', all_gen_restricting_dates, ids=idfn)
@allow_non_gpu('DataWritingCommandExec')
def test_cache_posexplode_makearray(spark_tmp_path, data_gen):
def test_cache_posexplode_makearray(spark_tmp_path, data_gen, ts_rebase, ts_write):
if is_spark_300() and data_gen.data_type == BooleanType():
pytest.xfail("https://issues.apache.org/jira/browse/SPARK-32672")
data_path_cpu = spark_tmp_path + '/PARQUET_DATA_CPU'
Expand All @@ -162,8 +165,12 @@ def posExplode(spark):
cached.write.parquet(data_path)
spark.read.parquet(data_path)
return posExplode
from_cpu = with_cpu_session(write_posExplode(data_path_cpu))
from_gpu = with_gpu_session(write_posExplode(data_path_gpu))
from_cpu = with_cpu_session(write_posExplode(data_path_cpu),
conf={'spark.sql.legacy.parquet.datetimeRebaseModeInWrite': ts_rebase,
'spark.sql.parquet.outputTimestampType': ts_write})
from_gpu = with_gpu_session(write_posExplode(data_path_gpu),
conf={'spark.sql.legacy.parquet.datetimeRebaseModeInWrite': ts_rebase,
'spark.sql.parquet.outputTimestampType': ts_write})
assert_equal(from_cpu, from_gpu)

@pytest.mark.parametrize('data_gen', all_gen, ids=idfn)
Expand All @@ -174,7 +181,7 @@ def op_df(spark, length=2048, seed=0):
('a', data_gen),
('b', IntegerGen())], nullable=False), length=length, seed=seed).cache()
cached.count() # populate the cache
return cached.rollup(f.col("a"), f.col("b")).agg(f.count(f.col("b")))
return cached.rollup(f.col("a"), f.col("b")).agg(f.col("b"))

assert_gpu_and_cpu_are_equal_collect(op_df)

0 comments on commit 36d478c

Please sign in to comment.