Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better float/double cases for casting tests #1781

Merged
merged 8 commits into from
Feb 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -338,31 +338,31 @@ class AnsiCastOpSuite extends GpuExpressionTestSuite {
frame => testCastTo(DataTypes.DoubleType)(frame)
}

testCastFailsForBadInputs("Test bad cast 1 from strings to floats", badFloatStringsDf,
testCastFailsForBadInputs("Test bad cast 1 from strings to floats", invalidFloatStringsDf,
msg = GpuCast.INVALID_FLOAT_CAST_MSG) {
frame =>frame.select(col("c0").cast(FloatType))
}

testCastFailsForBadInputs("Test bad cast 2 from strings to floats", badFloatStringsDf,
testCastFailsForBadInputs("Test bad cast 2 from strings to floats", invalidFloatStringsDf,
msg = GpuCast.INVALID_FLOAT_CAST_MSG) {
frame =>frame.select(col("c1").cast(FloatType))
}

testCastFailsForBadInputs("Test bad cast 1 from strings to double", badFloatStringsDf,
testCastFailsForBadInputs("Test bad cast 1 from strings to double", invalidFloatStringsDf,
msg = GpuCast.INVALID_FLOAT_CAST_MSG) {
frame =>frame.select(col("c0").cast(DoubleType))
}

testCastFailsForBadInputs("Test bad cast 2 from strings to double", badFloatStringsDf,
testCastFailsForBadInputs("Test bad cast 2 from strings to double", invalidFloatStringsDf,
msg = GpuCast.INVALID_FLOAT_CAST_MSG) {
frame =>frame.select(col("c1").cast(DoubleType))
}

//Currently there is a bug in cudf which doesn't convert one value correctly
// Currently there is a bug in cudf which doesn't convert some corner cases correctly
// The bug is documented here https://github.com/rapidsai/cudf/issues/5225
ignore("Test cast from strings to double that doesn't match") {
testSparkResultsAreEqual("Test cast from strings to double that doesn't match",
badDoubleStringsDf) {
badDoubleStringsDf, conf = sparkConf, maxFloatDiff = 0.0001) {
frame =>frame.select(
col("c0").cast(DoubleType))
}
Expand All @@ -372,12 +372,12 @@ class AnsiCastOpSuite extends GpuExpressionTestSuite {
// Ansi cast from floating point to string
///////////////////////////////////////////////////////////////////////////

ignore("ansi_cast float to string") {
test("ansi_cast float to string") {
testCastToString[Float](DataTypes.FloatType, ansiMode = true,
comparisonFunc = Some(compareStringifiedFloats))
}

ignore("ansi_cast double to string") {
test("ansi_cast double to string") {
testCastToString[Double](DataTypes.DoubleType, ansiMode = true,
comparisonFunc = Some(compareStringifiedFloats))
}
Expand Down
62 changes: 15 additions & 47 deletions tests/src/test/scala/com/nvidia/spark/rapids/CastOpSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -203,16 +203,6 @@ class CastOpSuite extends GpuExpressionTestSuite {
}
}

private def testCastTo(castTo: DataType)(frame: DataFrame): DataFrame ={
frame.withColumn("c1", col("c0").cast(castTo))
}

private def stringDf(str: String)(session: SparkSession): DataFrame = {
import session.sqlContext.implicits._
// use more than one value otherwise spark optimizes it out as a literal
Seq(str, str).toDF("c0")
}

private def castToStringExpectedFun[T]: T => Option[String] = (d: T) => Some(String.valueOf(d))

test("cast byte to string") {
Expand All @@ -231,11 +221,11 @@ class CastOpSuite extends GpuExpressionTestSuite {
testCastToString[Long](DataTypes.LongType)
}

ignore("cast float to string") {
test("cast float to string") {
testCastToString[Float](DataTypes.FloatType, comparisonFunc = Some(compareStringifiedFloats))
}

ignore("cast double to string") {
test("cast double to string") {
testCastToString[Double](DataTypes.DoubleType, comparisonFunc = Some(compareStringifiedFloats))
}

Expand Down Expand Up @@ -298,30 +288,6 @@ class CastOpSuite extends GpuExpressionTestSuite {
col("doubles").cast(TimestampType))
}

ignore("Test cast from double to string") {

//NOTE that the testSparkResultsAreEqual method isn't adequate in this case because we
// need to use a specialized comparison function

val conf = new SparkConf()
.set(RapidsConf.ENABLE_CAST_FLOAT_TO_STRING.key, "true")

val (cpu, gpu) = runOnCpuAndGpu(doubleDf, frame => frame.select(
col("doubles").cast(StringType))
.orderBy(col("doubles")), conf)

val fromCpu = cpu.map(row => row.getAs[String](0))
val fromGpu = gpu.map(row => row.getAs[String](0))

fromCpu.zip(fromGpu).foreach {
case (c, g) =>
if (!compareStringifiedFloats(c, g)) {
fail(s"Running on the GPU and on the CPU did not match: CPU value: $c. " +
s"GPU value: $g.")
}
}
}

testSparkResultsAreEqual("Test cast from boolean", booleanDf) {
frame => frame.select(
col("bools").cast(IntegerType),
Expand Down Expand Up @@ -396,14 +362,6 @@ class CastOpSuite extends GpuExpressionTestSuite {
col("doubles").cast(TimestampType))
}

ignore("Test cast from strings to double that doesn't match") {
testSparkResultsAreEqual("Test cast from strings to double that doesn't match",
badDoubleStringsDf) {
frame =>frame.select(
col("doubles").cast(DoubleType))
}
}

testSparkResultsAreEqual("Test cast from strings to doubles", doublesAsStrings,
conf = sparkConf, maxFloatDiff = 0.0001) {
frame => frame.select(
Expand All @@ -416,7 +374,7 @@ class CastOpSuite extends GpuExpressionTestSuite {
col("c0").cast(FloatType))
}

testSparkResultsAreEqual("Test bad cast from strings to floats", badFloatStringsDf,
testSparkResultsAreEqual("Test bad cast from strings to floats", invalidFloatStringsDf,
conf = sparkConf, maxFloatDiff = 0.0001) {
frame =>frame.select(
col("c0").cast(DoubleType),
Expand All @@ -425,6 +383,16 @@ class CastOpSuite extends GpuExpressionTestSuite {
col("c1").cast(FloatType))
}

// Currently there is a bug in cudf which doesn't convert some corner cases correctly
// The bug is documented here https://github.com/rapidsai/cudf/issues/5225
ignore("Test cast from strings to double that doesn't match") {
testSparkResultsAreEqual("Test cast from strings to double that doesn't match",
badDoubleStringsDf, conf = sparkConf, maxFloatDiff = 0.0001) {
frame => frame.select(
col("c0").cast(DoubleType))
}
}

testSparkResultsAreEqual("ansi_cast string to double exp", exponentsAsStringsDf,
conf = sparkConf, maxFloatDiff = 0.0001) {
frame => frame.select(
Expand Down Expand Up @@ -720,13 +688,13 @@ object CastOpSuite {

def doublesAsStrings(session: SparkSession): DataFrame = {
val schema = FuzzerUtils.createSchema(Seq(DoubleType), false)
val df = FuzzerUtils.generateDataFrame(session, schema, 100)
val df = FuzzerUtils.generateDataFrame(session, schema, 2048)
df.withColumn("c0", col("c0").cast(StringType))
}

def floatsAsStrings(session: SparkSession): DataFrame = {
val schema = FuzzerUtils.createSchema(Seq(FloatType), false)
val df = FuzzerUtils.generateDataFrame(session, schema, 100)
val df = FuzzerUtils.generateDataFrame(session, schema, 2048)
df.withColumn("c0", col("c0").cast(StringType))
}

Expand Down
16 changes: 10 additions & 6 deletions tests/src/test/scala/com/nvidia/spark/rapids/FuzzerUtils.scala
Original file line number Diff line number Diff line change
Expand Up @@ -301,30 +301,34 @@ class EnhancedRandom(protected val r: Random, protected val options: FuzzerOptio
}

def nextFloat(): Float = {
r.nextInt(9) match {
r.nextInt(11) match {
case 0 => Float.NaN
case 1 => Float.PositiveInfinity
case 2 => Float.NegativeInfinity
case 3 => r.nextFloat() * Float.MinValue
case 4 => r.nextFloat() * Float.MaxValue
case 3 => Float.MinValue
case 4 => Float.MaxValue
case 5 => 0 - r.nextFloat()
case 6 => r.nextFloat()
case 7 => 0f
case 8 => -0f
case 9 => r.nextFloat() * Float.MinValue
case 10 => r.nextFloat() * Float.MaxValue
}
}

def nextDouble(): Double = {
r.nextInt(9) match {
r.nextInt(11) match {
case 0 => Double.NaN
case 1 => Double.PositiveInfinity
case 2 => Double.NegativeInfinity
case 3 => r.nextDouble() * Double.MinValue
case 4 => r.nextDouble() * Double.MaxValue
case 3 => Double.MaxValue
case 4 => Double.MinValue
case 5 => 0 - r.nextDouble()
case 6 => r.nextDouble()
case 7 => 0d
case 8 => -0d
case 9 => r.nextDouble() * Double.MinValue
case 10 => r.nextDouble() * Double.MaxValue
}
}

Expand Down
Loading