diff --git a/sql-plugin/src/main/301until320-all/scala/com/nvidia/spark/rapids/shims/v2/OrcShims301until320Base.scala b/sql-plugin/src/main/301until320-all/scala/com/nvidia/spark/rapids/shims/v2/OrcShims301until320Base.scala index 543cb7c9afd..b5bb20f6f4a 100644 --- a/sql-plugin/src/main/301until320-all/scala/com/nvidia/spark/rapids/shims/v2/OrcShims301until320Base.scala +++ b/sql-plugin/src/main/301until320-all/scala/com/nvidia/spark/rapids/shims/v2/OrcShims301until320Base.scala @@ -20,7 +20,7 @@ import scala.collection.mutable.ArrayBuffer import com.nvidia.spark.rapids.OrcOutputStripe import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hive.common.io.DiskRangeList -import org.apache.orc.{CompressionCodec, CompressionKind, DataReader, OrcFile, OrcProto, PhysicalWriter, Reader, StripeInformation} +import org.apache.orc.{CompressionCodec, CompressionKind, DataReader, OrcFile, OrcProto, PhysicalWriter, Reader, StripeInformation, TypeDescription} import org.apache.orc.impl.{DataReaderProperties, OutStream, SchemaEvolution} import org.apache.orc.impl.RecordReaderImpl.SargApplier @@ -84,4 +84,11 @@ trait OrcShims301until320Base { } result } + + /** + * Compare if the two TypeDescriptions are equal by ignoring attribute + */ + def typeDescriptionEqual(lhs: TypeDescription, rhs: TypeDescription): Boolean = { + lhs.equals(rhs) + } } diff --git a/sql-plugin/src/main/320+/scala/com/nvidia/spark/rapids/shims/v2/OrcShims.scala b/sql-plugin/src/main/320+/scala/com/nvidia/spark/rapids/shims/v2/OrcShims.scala index 49dd18424fd..c1ad560089b 100644 --- a/sql-plugin/src/main/320+/scala/com/nvidia/spark/rapids/shims/v2/OrcShims.scala +++ b/sql-plugin/src/main/320+/scala/com/nvidia/spark/rapids/shims/v2/OrcShims.scala @@ -21,7 +21,7 @@ import com.nvidia.spark.rapids.OrcOutputStripe import com.nvidia.spark.rapids.RapidsPluginImplicits._ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.hive.common.io.DiskRangeList -import org.apache.orc.{CompressionCodec, CompressionKind, DataReader, OrcConf, OrcFile, OrcProto, PhysicalWriter, Reader, StripeInformation} +import org.apache.orc.{CompressionCodec, CompressionKind, DataReader, OrcConf, OrcFile, OrcProto, PhysicalWriter, Reader, StripeInformation, TypeDescription} import org.apache.orc.impl.{BufferChunk, BufferChunkList, DataReaderProperties, InStream, OrcCodecPool, OutStream, ReaderImpl, SchemaEvolution} import org.apache.orc.impl.RecordReaderImpl.SargApplier import org.apache.orc.impl.reader.StripePlanner @@ -120,4 +120,11 @@ object OrcShims { result } + + /** + * Compare if the two TypeDescriptions are equal by ignoring attribute + */ + def typeDescriptionEqual(lhs: TypeDescription, rhs: TypeDescription): Boolean = { + lhs.equals(rhs, false) + } } diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOrcScanBase.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOrcScanBase.scala index 6493d2cd3a7..095386d3337 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOrcScanBase.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOrcScanBase.scala @@ -1224,7 +1224,7 @@ private case class GpuOrcFileFilterHandler( newUnion // Primitive types should be equal to each other. case _ => - if (fileSchema != readSchema) { + if (!OrcShims.typeDescriptionEqual(fileSchema, readSchema)) { throw new QueryExecutionException("Incompatible schemas for ORC file" + s" at ${partFile.filePath}\n" + s" file schema: $fileSchema\n" +