intel-analytics · dding3 · Jul 13, 2020 · Jul 10, 2020 · Jul 10, 2020 · Jul 13, 2020
diff --git a/zoo/pom.xml b/zoo/pom.xml
@@ -66,6 +66,7 @@
                         <configuration>
                             <excludes>
                                 <exclude>**/com/intel/analytics/zoo/serving/**</exclude>
+                                <exclude>**/com/intel/analytics/zoo/pipeline/nnframes/XgboostTrainSpec.scala</exclude>
                             </excludes>
                         </configuration>
                     </plugin>

diff --git a/zoo/src/main/scala/com/intel/analytics/zoo/pipeline/nnframes/NNClassifier.scala b/zoo/src/main/scala/com/intel/analytics/zoo/pipeline/nnframes/NNClassifier.scala
@@ -22,7 +22,8 @@ import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
 import com.intel.analytics.bigdl.{Criterion, Module}
 import com.intel.analytics.zoo.feature.common._
 import com.intel.analytics.zoo.pipeline.nnframes.NNModel.NNModelWriter
-import ml.dmlc.xgboost4j.scala.spark.{XGBoostClassificationModel, XGBoostHelper, XGBoostRegressionModel}
+import ml.dmlc.xgboost4j.scala.spark.{XGBoostClassificationModel, XGBoostHelper,
+XGBoostRegressor, XGBoostRegressionModel}
 import org.apache.spark.ml.DefaultParamsWriterWrapper
 import org.apache.spark.ml.adapter.SchemaUtils
 import org.apache.spark.ml.feature.VectorAssembler
@@ -362,6 +363,204 @@ object XGBClassifierModel {
   }
 }
 
+/**
+ * [[XGBRegressor]] xgboost wrapper of XGBRegressor.
+ */
+class XGBRegressor () {
+
+  private val model = new XGBoostRegressor()
+
+  def setLabelCol(labelColName : String) : this.type = {
+    model.setLabelCol(labelColName)
+    this
+  }
+
+  def setFeaturesCol(featuresColName: String): this.type = {
+    model.setFeaturesCol(featuresColName)
+    this
+  }
+
+  def fit(df: DataFrame): XGBRegressorModel = {
+    val xgbModel = model.fit(df)
+    new XGBRegressorModel(xgbModel)
+  }
+
+  def setNumRound(value: Int): this.type = {
+    model.setNumRound(value)
+    this
+  }
+
+  def setNumWorkers(value: Int): this.type = {
+    model.setNumWorkers(value)
+    this
+  }
+
+  def setNthread(value: Int): this.type = {
+    model.setNthread(value)
+    this
+  }
+
+  def setSilent(value: Int): this.type = {
+    model.setSilent(value)
+    this
+  }
+
+  def setMissing(value: Float): this.type = {
+    model.setMissing(value)
+    this
+  }
+
+  def setCheckpointPath(value: String): this.type = {
+    model.setCheckpointPath(value)
+    this
+  }
+
+  def setCheckpointInterval(value: Int): this.type = {
+    model.setCheckpointInterval(value)
+    this
+  }
+
+  def setSeed(value: Long): this.type = {
+    model.setSeed(value)
+    this
+  }
+
+  def setEta(value: Double): this.type = {
+    model.setEta(value)
+    this
+  }
+
+  def setGamma(value: Double): this.type = {
+    model.setGamma(value)
+    this
+  }
+
+  def setMaxDepth(value: Int): this.type = {
+    model.setMaxDepth(value)
+    this
+  }
+
+  def setMinChildWeight(value: Double): this.type = {
+    model.setMinChildWeight(value)
+    this
+  }
+
+  def setMaxDeltaStep(value: Double): this.type = {
+    model.setMaxDeltaStep(value)
+    this
+  }
+
+  def setColsampleBytree(value: Double): this.type = {
+    model.setColsampleBytree(value)
+    this
+  }
+
+  def setColsampleBylevel(value: Double): this.type = {
+    model.setColsampleBylevel(value)
+    this
+  }
+
+  def setLambda(value: Double): this.type = {
+    model.setLambda(value)
+    this
+  }
+
+  def setAlpha(value: Double): this.type = {
+    model.setAlpha(value)
+    this
+  }
+
+  def setTreeMethod(value: String): this.type = {
+    model.setTreeMethod(value)
+    this
+  }
+
+  def setGrowPolicy(value: String): this.type = {
+    model.setGrowPolicy(value)
+    this
+  }
+
+  def setMaxBins(value: Int): this.type = {
+    model.setMaxBins(value)
+    this
+  }
+
+  def setMaxLeaves(value: Int): this.type = {
+    model.setMaxLeaves(value)
+    this
+  }
+
+  def setSketchEps(value: Double): this.type = {
+    model.setSketchEps(value)
+    this
+  }
+
+  def setScalePosWeight(value: Double): this.type = {
+    model.setScalePosWeight(value)
+    this
+  }
+
+  def setSampleType(value: String): this.type = {
+    model.setSampleType(value)
+    this
+  }
+
+  def setNormalizeType(value: String): this.type = {
+    model.setNormalizeType(value)
+    this
+  }
+
+  def setRateDrop(value: Double): this.type = {
+    model.setRateDrop(value)
+    this
+  }
+
+  def setSkipDrop(value: Double): this.type = {
+    model.setSkipDrop(value)
+    this
+  }
+
+  def setLambdaBias(value: Double): this.type = {
+    model.setLambdaBias(value)
+    this
+  }
+
+  def setObjective(value: String): this.type = {
+    model.setObjective(value)
+    this
+  }
+
+  def setObjectiveType(value: String): this.type = {
+    model.setObjectiveType(value)
+    this
+  }
+
+  def setSubsample(value: Double): this.type = {
+    model.setSubsample(value)
+    this
+  }
+
+  def setBaseScore(value: Double): this.type = {
+    model.setBaseScore(value)
+    this
+  }
+
+  def setEvalMetric(value: String): this.type = {
+    model.setEvalMetric(value)
+    this
+  }
+
+  def setNumEarlyStoppingRounds(value: Int): this.type = {
+    model.setNumEarlyStoppingRounds(value)
+    this
+  }
+
+  def setMaximizeEvaluationMetrics(value: Boolean): this.type = {
+    model.setMaximizeEvaluationMetrics(value)
+    this
+  }
+}
+
 /**
  * [[XGBRegressorModel]] xgboost wrapper of XGBRegressorModel.
  */

diff --git a/zoo/src/test/scala/com/intel/analytics/zoo/pipeline/nnframes/XgboostTrainSpec.scala b/zoo/src/test/scala/com/intel/analytics/zoo/pipeline/nnframes/XgboostTrainSpec.scala
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2018 Analytics Zoo Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.intel.analytics.zoo.pipeline.nnframes
+
+import com.intel.analytics.bigdl.utils.Engine
+import com.intel.analytics.zoo.pipeline.api.keras.ZooSpecHelper
+import org.apache.spark.SparkContext
+import org.apache.spark.ml.feature.{VectorAssembler}
+import org.apache.spark.sql.{SQLContext, SparkSession}
+
+class XgboostTrainSpec extends ZooSpecHelper {
+  var sc : SparkContext = _
+  var sqlContext : SQLContext = _
+
+  override def doBefore(): Unit = {
+    val conf = Engine.createSparkConf().setAppName("Test NNClassifier").setMaster("local[1]")
+    sc = SparkContext.getOrCreate(conf)
+    sqlContext = new SQLContext(sc)
+  }
+
+  override def doAfter(): Unit = {
+    if (sc != null) {
+      sc.stop()
+    }
+  }
+
+  "XGBRegressor train" should "work" in {
+    val spark = SparkSession.builder().getOrCreate()
+    import spark.implicits._
+
+    val df = Seq(
+      (1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 1.0f, 2.0f, 4.0f, 8.0f, 3.0f, 116.3668f),
+      (1.0f, 3.0f, 8.0f, 6.0f, 5.0f, 9.0f, 5.0f, 6.0f, 7.0f, 4.0f, 116.367f),
+      (2.0f, 1.0f, 5.0f, 7.0f, 6.0f, 7.0f, 4.0f, 1.0f, 2.0f, 3.0f, 116.367f),
+      (2.0f, 1.0f, 4.0f, 3.0f, 6.0f, 1.0f, 3.0f, 2.0f, 1.0f, 3.0f, 116.3668f)
+    ).toDF("f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10", "label")
+
+    val vectorAssembler = new VectorAssembler()
+      .setInputCols(Array("f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "f10"))
+      .setOutputCol("features")
+    val assembledDf = vectorAssembler.transform(df).select("features", "label").cache()
+
+    val xgbRf0 = new XGBRegressor()
+    val xgbRegressorModel0 = xgbRf0.fit(assembledDf)
+    val y0 = xgbRegressorModel0.transform(assembledDf)
+
+    xgbRegressorModel0.save("/tmp/test")
+    val model = XGBRegressorModel.load("/tmp/test")
+    val y0_0 = model.transform(assembledDf)
+    assert(y0_0.except(y0).count()==0)
+  }
+}