You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[2022-05-13T04:29:12.934Z] �[31m�[1m___________________ test_parquet_check_schema_compatibility ____________________�[0m
[2022-05-13T04:29:12.934Z] [gw2] linux -- Python 3.8.13 /databricks/conda/envs/cudf-udf/bin/python
[2022-05-13T04:29:12.934Z]
[2022-05-13T04:29:12.934Z] spark_tmp_path = '/tmp/pyspark_tests//0513-023846-8xru1kk3-10-2-128-4-gw2-5446-2128719059/'
[2022-05-13T04:29:12.934Z]
[2022-05-13T04:29:12.934Z] def test_parquet_check_schema_compatibility(spark_tmp_path):
[2022-05-13T04:29:12.934Z] data_path = spark_tmp_path + '/PARQUET_DATA'
[2022-05-13T04:29:12.934Z] gen_list = [('int', int_gen), ('long', long_gen), ('dec32', decimal_gen_32bit)]
[2022-05-13T04:29:12.934Z] with_cpu_session(lambda spark: gen_df(spark, gen_list).coalesce(1).write.parquet(data_path))
[2022-05-13T04:29:12.934Z]
[2022-05-13T04:29:12.934Z] read_int_as_long = StructType(
[2022-05-13T04:29:12.934Z] [StructField('long', LongType()), StructField('int', LongType())])
[2022-05-13T04:29:12.934Z] > assert_gpu_and_cpu_error(
[2022-05-13T04:29:12.934Z] lambda spark: spark.read.schema(read_int_as_long).parquet(data_path).collect(),
[2022-05-13T04:29:12.934Z] conf={},
[2022-05-13T04:29:12.934Z] error_message='Parquet column cannot be converted in')
[2022-05-13T04:29:12.934Z]
[2022-05-13T04:29:12.934Z] �[1m�[31m../../src/main/python/parquet_test.py�[0m:878:
[2022-05-13T04:29:12.934Z] _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
[2022-05-13T04:29:12.934Z] �[1m�[31m../../src/main/python/asserts.py�[0m:572: in assert_gpu_and_cpu_error
[2022-05-13T04:29:12.934Z] assert_py4j_exception(lambda: with_cpu_session(df_fun, conf), error_message)
[2022-05-13T04:29:12.934Z] _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
[2022-05-13T04:29:12.934Z]
[2022-05-13T04:29:12.934Z] func = <function assert_gpu_and_cpu_error.<locals>.<lambda> at 0x7f37a5e7cc10>
[2022-05-13T04:29:12.934Z] error_message = 'Parquet column cannot be converted in'
[2022-05-13T04:29:12.934Z]
[2022-05-13T04:29:12.934Z] def assert_py4j_exception(func, error_message):
[2022-05-13T04:29:12.934Z] """
[2022-05-13T04:29:12.934Z] Assert that a specific Java exception is thrown
[2022-05-13T04:29:12.934Z] :param func: a function to be verified
[2022-05-13T04:29:12.934Z] :param error_message: a string such as the one produce by java.lang.Exception.toString
[2022-05-13T04:29:12.934Z] :return: Assertion failure if no exception matching error_message has occurred.
[2022-05-13T04:29:12.934Z] """
[2022-05-13T04:29:12.934Z] with pytest.raises(Py4JJavaError) as py4jError:
[2022-05-13T04:29:12.934Z] func()
[2022-05-13T04:29:12.934Z] > assert error_message in str(py4jError.value.java_exception)
[2022-05-13T04:29:12.934Z] �[1m�[31mE AssertionError�[0m
[2022-05-13T04:29:12.934Z]
[2022-05-13T04:29:12.934Z] �[1m�[31m../../src/main/python/asserts.py�[0m:561: AssertionError
[2022-05-13T04:29:12.934Z] ----------------------------- Captured stderr call -----------------------------
[2022-05-13T04:29:12.934Z] 22/05/13 04:03:47 ERROR FileScanRDD: Error while reading file file:/tmp/pyspark_tests/0513-023846-8xru1kk3-10-2-128-4-gw2-5446-2128719059/PARQUET_DATA/part-00000-tid-4751444801957620801-e47b08e7-db20-487f-8980-d7e31862c6ae-31197-1-c000.snappy.parquet. Parquet column cannot be converted. Column: [int], Expected: LongType, Found: INT32
[2022-05-13T04:29:12.934Z] 22/05/13 04:03:47 ERROR FileScanRDD: Error in async I/O
[2022-05-13T04:29:12.934Z] org.apache.spark.SparkException: Exception thrown in awaitResult:
[2022-05-13T04:29:12.934Z] at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:428)
[2022-05-13T04:29:12.934Z] at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:449)
[2022-05-13T04:29:12.934Z] at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.$anonfun$hasNext$1(FileScanRDD.scala:302)
[2022-05-13T04:29:12.934Z] at scala.runtime.java8.JFunction0$mcZ$sp.apply(JFunction0$mcZ$sp.java:23)
[2022-05-13T04:29:12.934Z] at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
[2022-05-13T04:29:12.935Z] at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:297)
[2022-05-13T04:29:12.935Z] at org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:845)
[2022-05-13T04:29:12.935Z] at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.columnartorow_nextBatch_0$(Unknown Source)
[2022-05-13T04:29:12.935Z] at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
[2022-05-13T04:29:12.935Z] at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
[2022-05-13T04:29:12.935Z] at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:757)
[2022-05-13T04:29:12.935Z] at org.apache.spark.sql.execution.collect.UnsafeRowBatchUtils$.encodeUnsafeRows(UnsafeRowBatchUtils.scala:80)
[2022-05-13T04:29:12.935Z] at org.apache.spark.sql.execution.collect.Collector.$anonfun$processFunc$1(Collector.scala:178)
[2022-05-13T04:29:12.935Z] at org.apache.spark.scheduler.ResultTask.$anonfun$runTask$3(ResultTask.scala:75)
[2022-05-13T04:29:12.935Z] at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
[2022-05-13T04:29:12.935Z] at org.apache.spark.scheduler.ResultTask.$anonfun$runTask$1(ResultTask.scala:75)
[2022-05-13T04:29:12.935Z] at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
[2022-05-13T04:29:12.935Z] at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:55)
[2022-05-13T04:29:12.935Z] at org.apache.spark.scheduler.Task.doRunTask(Task.scala:150)
[2022-05-13T04:29:12.935Z] at org.apache.spark.scheduler.Task.$anonfun$run$1(Task.scala:119)
[2022-05-13T04:29:12.935Z] at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
[2022-05-13T04:29:12.935Z] at org.apache.spark.scheduler.Task.run(Task.scala:91)
[2022-05-13T04:29:12.935Z] at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$13(Executor.scala:813)
[2022-05-13T04:29:12.935Z] at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1646)
[2022-05-13T04:29:12.935Z] at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:816)
[2022-05-13T04:29:12.935Z] at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
[2022-05-13T04:29:12.935Z] at com.databricks.spark.util.ExecutorFrameProfiler$.record(ExecutorFrameProfiler.scala:110)
[2022-05-13T04:29:12.935Z] at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:672)
[2022-05-13T04:29:12.935Z] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
[2022-05-13T04:29:12.935Z] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
[2022-05-13T04:29:12.935Z] at java.lang.Thread.run(Thread.java:748)
[2022-05-13T04:29:12.935Z] Caused by: com.databricks.sql.io.FileReadException: Error while reading file file:/tmp/pyspark_tests/0513-023846-8xru1kk3-10-2-128-4-gw2-5446-2128719059/PARQUET_DATA/part-00000-tid-4751444801957620801-e47b08e7-db20-487f-8980-d7e31862c6ae-31197-1-c000.snappy.parquet. Parquet column cannot be converted. Column: [int], Expected: LongType, Found: INT32
[2022-05-13T04:29:12.935Z] at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.logFileNameAndThrow(FileScanRDD.scala:391)
[2022-05-13T04:29:12.935Z] at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:361)
[2022-05-13T04:29:12.935Z] at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
[2022-05-13T04:29:12.935Z] at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.$anonfun$prepareNextFile$1(FileScanRDD.scala:564)
[2022-05-13T04:29:12.935Z] at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
[2022-05-13T04:29:12.935Z] at scala.util.Success.$anonfun$map$1(Try.scala:255)
[2022-05-13T04:29:12.935Z] at scala.util.Success.map(Try.scala:213)
[2022-05-13T04:29:12.935Z] at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
[2022-05-13T04:29:12.935Z] at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
[2022-05-13T04:29:12.935Z] at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
[2022-05-13T04:29:12.935Z] at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
[2022-05-13T04:29:12.935Z] at org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.$anonfun$run$1(SparkThreadLocalForwardingThreadPoolExecutor.scala:104)
[2022-05-13T04:29:12.935Z] at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
[2022-05-13T04:29:12.935Z] at org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:68)
[2022-05-13T04:29:12.935Z] at org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured$(SparkThreadLocalForwardingThreadPoolExecutor.scala:54)
[2022-05-13T04:29:12.935Z] at org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:101)
[2022-05-13T04:29:12.935Z] at org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.run(SparkThreadLocalForwardingThreadPoolExecutor.scala:104)
[2022-05-13T04:29:12.935Z] ... 3 more
[2022-05-13T04:29:12.935Z] Caused by: org.apache.spark.sql.execution.datasources.SchemaColumnConvertNotSupportedException
[2022-05-13T04:29:12.935Z] at com.databricks.sql.io.parquet.VectorizedColumnReader.constructConvertNotSupportedException(VectorizedColumnReader.java:448)
[2022-05-13T04:29:12.935Z] at com.databricks.sql.io.parquet.VectorizedColumnReader.readIntBatch(VectorizedColumnReader.java:693)
[2022-05-13T04:29:12.935Z] at com.databricks.sql.io.parquet.VectorizedColumnReader.readBatch(VectorizedColumnReader.java:391)
[2022-05-13T04:29:12.935Z] at com.databricks.sql.io.parquet.DatabricksVectorizedParquetRecordReader.nextBatch(DatabricksVectorizedParquetRecordReader.java:480)
[2022-05-13T04:29:12.935Z] at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextKeyValue(VectorizedParquetRecordReader.java:190)
[2022-05-13T04:29:12.936Z] at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39)
[2022-05-13T04:29:12.936Z] at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:335)
[2022-05-13T04:29:12.936Z] ... 18 more
[2022-05-13T04:29:12.936Z] 22/05/13 04:03:47 ERROR Executor: Exception in task 0.0 in stage 11596.0 (TID 31198)
[2022-05-13T04:29:12.936Z] com.databricks.sql.io.FileReadException: Error while reading file file:/tmp/pyspark_tests/0513-023846-8xru1kk3-10-2-128-4-gw2-5446-2128719059/PARQUET_DATA/part-00000-tid-4751444801957620801-e47b08e7-db20-487f-8980-d7e31862c6ae-31197-1-c000.snappy.parquet. Parquet column cannot be converted. Column: [int], Expected: LongType, Found: INT32
[2022-05-13T04:29:12.936Z] at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.logFileNameAndThrow(FileScanRDD.scala:391)
[2022-05-13T04:29:12.936Z] at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:361)
[2022-05-13T04:29:12.936Z] at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
[2022-05-13T04:29:12.936Z] at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.$anonfun$prepareNextFile$1(FileScanRDD.scala:564)
[2022-05-13T04:29:12.936Z] at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
[2022-05-13T04:29:12.936Z] at scala.util.Success.$anonfun$map$1(Try.scala:255)
[2022-05-13T04:29:12.936Z] at scala.util.Success.map(Try.scala:213)
[2022-05-13T04:29:12.936Z] at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
[2022-05-13T04:29:12.936Z] at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
[2022-05-13T04:29:12.936Z] at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
[2022-05-13T04:29:12.936Z] at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
[2022-05-13T04:29:12.936Z] at org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.$anonfun$run$1(SparkThreadLocalForwardingThreadPoolExecutor.scala:104)
[2022-05-13T04:29:12.936Z] at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
[2022-05-13T04:29:12.936Z] at org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:68)
[2022-05-13T04:29:12.936Z] at org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured$(SparkThreadLocalForwardingThreadPoolExecutor.scala:54)
[2022-05-13T04:29:12.936Z] at org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:101)
[2022-05-13T04:29:12.936Z] at org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.run(SparkThreadLocalForwardingThreadPoolExecutor.scala:104)
[2022-05-13T04:29:12.936Z] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
[2022-05-13T04:29:12.936Z] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
[2022-05-13T04:29:12.936Z] at java.lang.Thread.run(Thread.java:748)
[2022-05-13T04:29:12.936Z] Caused by: org.apache.spark.sql.execution.datasources.SchemaColumnConvertNotSupportedException
[2022-05-13T04:29:12.936Z] at com.databricks.sql.io.parquet.VectorizedColumnReader.constructConvertNotSupportedException(VectorizedColumnReader.java:448)
[2022-05-13T04:29:12.936Z] at com.databricks.sql.io.parquet.VectorizedColumnReader.readIntBatch(VectorizedColumnReader.java:693)
[2022-05-13T04:29:12.936Z] at com.databricks.sql.io.parquet.VectorizedColumnReader.readBatch(VectorizedColumnReader.java:391)
[2022-05-13T04:29:12.936Z] at com.databricks.sql.io.parquet.DatabricksVectorizedParquetRecordReader.nextBatch(DatabricksVectorizedParquetRecordReader.java:480)
[2022-05-13T04:29:12.936Z] at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextKeyValue(VectorizedParquetRecordReader.java:190)
[2022-05-13T04:29:12.936Z] at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39)
[2022-05-13T04:29:12.936Z] at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:335)
[2022-05-13T04:29:12.936Z] ... 18 more
[2022-05-13T04:29:12.936Z] 22/05/13 04:03:47 ERROR FileScanRDD: Error in async I/O
[2022-05-13T04:29:12.936Z] com.databricks.sql.io.FileReadException: Error while reading file file:/tmp/pyspark_tests/0513-023846-8xru1kk3-10-2-128-4-gw2-5446-2128719059/PARQUET_DATA/part-00000-tid-4751444801957620801-e47b08e7-db20-487f-8980-d7e31862c6ae-31197-1-c000.snappy.parquet. Parquet column cannot be converted. Column: [int], Expected: LongType, Found: INT32
[2022-05-13T04:29:12.936Z] at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.logFileNameAndThrow(FileScanRDD.scala:391)
[2022-05-13T04:29:12.936Z] at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:361)
[2022-05-13T04:29:12.936Z] at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
[2022-05-13T04:29:12.936Z] at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.$anonfun$prepareNextFile$1(FileScanRDD.scala:564)
[2022-05-13T04:29:12.936Z] at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
[2022-05-13T04:29:12.936Z] at scala.util.Success.$anonfun$map$1(Try.scala:255)
[2022-05-13T04:29:12.936Z] at scala.util.Success.map(Try.scala:213)
[2022-05-13T04:29:12.936Z] at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
[2022-05-13T04:29:12.936Z] at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
[2022-05-13T04:29:12.936Z] at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
[2022-05-13T04:29:12.936Z] at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
[2022-05-13T04:29:12.936Z] at org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.$anonfun$run$1(SparkThreadLocalForwardingThreadPoolExecutor.scala:104)
[2022-05-13T04:29:12.936Z] at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
[2022-05-13T04:29:12.937Z] at org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:68)
[2022-05-13T04:29:12.937Z] at org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured$(SparkThreadLocalForwardingThreadPoolExecutor.scala:54)
[2022-05-13T04:29:12.937Z] at org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:101)
[2022-05-13T04:29:12.937Z] at org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.run(SparkThreadLocalForwardingThreadPoolExecutor.scala:104)
[2022-05-13T04:29:12.937Z] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
[2022-05-13T04:29:12.937Z] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
[2022-05-13T04:29:12.937Z] at java.lang.Thread.run(Thread.java:748)
[2022-05-13T04:29:12.937Z] Caused by: org.apache.spark.sql.execution.datasources.SchemaColumnConvertNotSupportedException
[2022-05-13T04:29:12.937Z] at com.databricks.sql.io.parquet.VectorizedColumnReader.constructConvertNotSupportedException(VectorizedColumnReader.java:448)
[2022-05-13T04:29:12.937Z] at com.databricks.sql.io.parquet.VectorizedColumnReader.readIntBatch(VectorizedColumnReader.java:693)
[2022-05-13T04:29:12.937Z] at com.databricks.sql.io.parquet.VectorizedColumnReader.readBatch(VectorizedColumnReader.java:391)
[2022-05-13T04:29:12.937Z] at com.databricks.sql.io.parquet.DatabricksVectorizedParquetRecordReader.nextBatch(DatabricksVectorizedParquetRecordReader.java:480)
[2022-05-13T04:29:12.937Z] at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextKeyValue(VectorizedParquetRecordReader.java:190)
[2022-05-13T04:29:12.937Z] at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39)
[2022-05-13T04:29:12.937Z] at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:335)
[2022-05-13T04:29:12.937Z] ... 18 more
[2022-05-13T04:29:12.937Z] 22/05/13 04:03:47 WARN TaskSetManager: Lost task 0.0 in stage 11596.0 (TID 31198) (10.2.128.4 executor driver): com.databricks.sql.io.FileReadException: Error while reading file file:/tmp/pyspark_tests/0513-023846-8xru1kk3-10-2-128-4-gw2-5446-2128719059/PARQUET_DATA/part-00000-tid-4751444801957620801-e47b08e7-db20-487f-8980-d7e31862c6ae-31197-1-c000.snappy.parquet. Parquet column cannot be converted. Column: [int], Expected: LongType, Found: INT32
[2022-05-13T04:29:12.937Z] at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.logFileNameAndThrow(FileScanRDD.scala:391)
[2022-05-13T04:29:12.937Z] at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:361)
[2022-05-13T04:29:12.937Z] at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
[2022-05-13T04:29:12.937Z] at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.$anonfun$prepareNextFile$1(FileScanRDD.scala:564)
[2022-05-13T04:29:12.937Z] at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
[2022-05-13T04:29:12.937Z] at scala.util.Success.$anonfun$map$1(Try.scala:255)
[2022-05-13T04:29:12.937Z] at scala.util.Success.map(Try.scala:213)
[2022-05-13T04:29:12.937Z] at scala.concurrent.Future.$anonfun$map$1(Future.scala:292)
[2022-05-13T04:29:12.937Z] at scala.concurrent.impl.Promise.liftedTree1$1(Promise.scala:33)
[2022-05-13T04:29:12.937Z] at scala.concurrent.impl.Promise.$anonfun$transform$1(Promise.scala:33)
[2022-05-13T04:29:12.937Z] at scala.concurrent.impl.CallbackRunnable.run(Promise.scala:64)
[2022-05-13T04:29:12.937Z] at org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.$anonfun$run$1(SparkThreadLocalForwardingThreadPoolExecutor.scala:104)
[2022-05-13T04:29:12.937Z] at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)
[2022-05-13T04:29:12.937Z] at org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:68)
[2022-05-13T04:29:12.937Z] at org.apache.spark.util.threads.SparkThreadLocalCapturingHelper.runWithCaptured$(SparkThreadLocalForwardingThreadPoolExecutor.scala:54)
[2022-05-13T04:29:12.937Z] at org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.runWithCaptured(SparkThreadLocalForwardingThreadPoolExecutor.scala:101)
[2022-05-13T04:29:12.937Z] at org.apache.spark.util.threads.SparkThreadLocalCapturingRunnable.run(SparkThreadLocalForwardingThreadPoolExecutor.scala:104)
[2022-05-13T04:29:12.937Z] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
[2022-05-13T04:29:12.937Z] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
[2022-05-13T04:29:12.937Z] at java.lang.Thread.run(Thread.java:748)
[2022-05-13T04:29:12.937Z] Caused by: org.apache.spark.sql.execution.datasources.SchemaColumnConvertNotSupportedException
[2022-05-13T04:29:12.937Z] at com.databricks.sql.io.parquet.VectorizedColumnReader.constructConvertNotSupportedException(VectorizedColumnReader.java:448)
[2022-05-13T04:29:12.937Z] at com.databricks.sql.io.parquet.VectorizedColumnReader.readIntBatch(VectorizedColumnReader.java:693)
[2022-05-13T04:29:12.937Z] at com.databricks.sql.io.parquet.VectorizedColumnReader.readBatch(VectorizedColumnReader.java:391)
[2022-05-13T04:29:12.937Z] at com.databricks.sql.io.parquet.DatabricksVectorizedParquetRecordReader.nextBatch(DatabricksVectorizedParquetRecordReader.java:480)
[2022-05-13T04:29:12.937Z] at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.nextKeyValue(VectorizedParquetRecordReader.java:190)
[2022-05-13T04:29:12.938Z] at org.apache.spark.sql.execution.datasources.RecordReaderIterator.hasNext(RecordReaderIterator.scala:39)
[2022-05-13T04:29:12.938Z] at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:335)
[2022-05-13T04:29:12.938Z] ... 18 more
The text was updated successfully, but these errors were encountered:
Describe the bug
related to #5434
The text was updated successfully, but these errors were encountered: