Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cost-based optimizer: Implement simple cost model that demonstrates benefits with NDS queries #2744

Merged
merged 12 commits into from
Jun 23, 2021
Original file line number Diff line number Diff line change
Expand Up @@ -286,21 +286,9 @@ class CpuCostModel(conf: RapidsConf) extends CostModel {
val rowCount = RowCountPlanVisitor.visit(plan).map(_.toDouble)
.getOrElse(conf.defaultRowCount.toDouble)

val operatorCost = plan.wrapped match {
case _: ProjectExec =>
// this is not accurate because CPU projections do have a cost due to appending values
// to each row that is produced, but this needs to be a really small number because
// GpuProject cost is zero (in our cost model) and we don't want to encourage moving to
// the GPU just to do a trivial projection, so we pretend the overhead of a
// CPU projection (beyond evaluating the expressions) is also zero
0
case _: UnionExec =>
// union does not further process data produced by its children
0
case _ => plan.conf
val operatorCost = plan.conf
.getCpuOperatorCost(plan.wrapped.getClass.getSimpleName)
.getOrElse(conf.defaultCpuOperatorCost) * rowCount
}

val exprEvalCost = plan.childExprs
.map(expr => exprCost(expr.asInstanceOf[BaseExprMeta[Expression]], rowCount))
Expand Down Expand Up @@ -347,18 +335,9 @@ class GpuCostModel(conf: RapidsConf) extends CostModel {
val rowCount = RowCountPlanVisitor.visit(plan).map(_.toDouble)
.getOrElse(conf.defaultRowCount.toDouble)

val operatorCost = plan.wrapped match {
case _: ProjectExec =>
// The cost of a GPU projection is mostly the cost of evaluating the expressions
// to produce the projected columns
0
case _: UnionExec =>
// union does not further process data produced by its children
0
case _ => plan.conf
val operatorCost = plan.conf
.getGpuOperatorCost(plan.wrapped.getClass.getSimpleName)
.getOrElse(conf.defaultGpuOperatorCost) * rowCount
}

val exprEvalCost = plan.childExprs
.map(expr => exprCost(expr.asInstanceOf[BaseExprMeta[Expression]], rowCount))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1609,6 +1609,21 @@ class RapidsConf(conf: Map[String, String]) extends Logging {

lazy val isRangeWindowLongEnabled: Boolean = get(ENABLE_RANGE_WINDOW_LONG)

private val optimizerDefaults = Map(
// this is not accurate because CPU projections do have a cost due to appending values
// to each row that is produced, but this needs to be a really small number because
// GpuProject cost is zero (in our cost model) and we don't want to encourage moving to
// the GPU just to do a trivial projection, so we pretend the overhead of a
// CPU projection (beyond evaluating the expressions) is also zero
"spark.rapids.sql.optimizer.cpu.exec.ProjectExec" -> "0",
// The cost of a GPU projection is mostly the cost of evaluating the expressions
// to produce the projected columns
"spark.rapids.sql.optimizer.gpu.exec.ProjectExec" -> "0",
// union does not further process data produced by its children
"spark.rapids.sql.optimizer.cpu.exec.UnionExec" -> "0",
"spark.rapids.sql.optimizer.gpu.exec.UnionExec" -> "0"
)

def isOperatorEnabled(key: String, incompat: Boolean, isDisabledByDefault: Boolean): Boolean = {
val default = !(isDisabledByDefault || incompat) || (incompat && isIncompatEnabled)
conf.get(key).map(toBoolean(_, key)).getOrElse(default)
Expand Down Expand Up @@ -1647,6 +1662,7 @@ class RapidsConf(conf: Map[String, String]) extends Logging {
}

private def getOptionalCost(key: String) = {
conf.get(key).map(toDouble(_, key))
// user-provided value takes precedence, then look in defaults map
conf.get(key).orElse(optimizerDefaults.get(key)).map(toDouble(_, key))
jlowe marked this conversation as resolved.
Show resolved Hide resolved
}
}