add maskrcnn inference example (intel-analytics#2944)

* add maskrcnn inference example * meet pr comments * add model download url
dding3 · Oct 28, 2019 · 811f063 · 811f063
1 parent bac5997
commit 811f063
Show file tree

Hide file tree

Showing 11 changed files with 294 additions and 92 deletions.
diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/dataset/DataSet.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/dataset/DataSet.scala
@@ -356,13 +356,14 @@ object DataSet {
   /**
    * Wrap a RDD as a DataSet.
    * @param data
+   * @param partitionNum repartition data rdd to partition number, default node number.
    * @tparam T
    * @return
    */
-  def rdd[T: ClassTag](data: RDD[T]): DistributedDataSet[T] = {
-    val nodeNumber = Engine.nodeNumber()
+  def rdd[T: ClassTag](data: RDD[T], partitionNum: Int = Engine.nodeNumber()
+    ): DistributedDataSet[T] = {
     new CachedDistriDataSet[T](
-      data.coalesce(nodeNumber, true)
+      data.coalesce(partitionNum, true)
         .mapPartitions(iter => {
           Iterator.single(iter.toArray)
         }).setName("cached dataset")
@@ -646,7 +647,7 @@ object DataSet {
           imf
         }
         .coalesce(num)
-     DataSet.rdd(rawData)
+     DataSet.rdd(rawData, num)
     }
 
     private[bigdl] def filesToImageFeatureDataset(url: String, sc: SparkContext,

diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/models/maskrcnn/MaskRCNN.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/models/maskrcnn/MaskRCNN.scala
@@ -18,7 +18,6 @@ package com.intel.analytics.bigdl.models.maskrcnn
 
 import com.intel.analytics.bigdl.Module
 import com.intel.analytics.bigdl.dataset.segmentation.{MaskUtils, RLEMasks}
-import com.intel.analytics.bigdl.models.resnet.{Convolution, Sbn}
 import com.intel.analytics.bigdl.nn._
 import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, Activity}
 import com.intel.analytics.bigdl.serialization.Bigdl.{AttrValue, BigDLModule}
@@ -78,14 +77,28 @@ class MaskRCNN(val inChannels: Int,
     modules.append(boxHead.asInstanceOf[Module[Float]])
     modules.append(maskHead.asInstanceOf[Module[Float]])
 
-    private def buildResNet50(): Module[Float] = {
+  private def buildResNet50(): Module[Float] = {
+
+    def convolution (nInputPlane: Int, nOutputPlane: Int, kernelW: Int, kernelH: Int,
+      strideW: Int = 1, strideH: Int = 1, padW: Int = 0, padH: Int = 0,
+      nGroup: Int = 1, propagateBack: Boolean = true): SpatialConvolution[Float] = {
+        val conv = SpatialConvolution[Float](nInputPlane, nOutputPlane, kernelW, kernelH,
+          strideW, strideH, padW, padH, nGroup, propagateBack, withBias = false)
+        conv.setInitMethod(MsraFiller(false), Zeros)
+        conv
+      }
+
+    def sbn(nOutput: Int, eps: Double = 1e-3, momentum: Double = 0.1, affine: Boolean = true)
+      : SpatialBatchNormalization[Float] = {
+        SpatialBatchNormalization[Float](nOutput, eps, momentum, affine).setInitMethod(Ones, Zeros)
+      }
 
     def shortcut(nInputPlane: Int, nOutputPlane: Int, stride: Int,
                  useConv: Boolean = false): Module[Float] = {
       if (useConv) {
         Sequential()
-          .add(Convolution(nInputPlane, nOutputPlane, 1, 1, stride, stride))
-          .add(Sbn(nOutputPlane))
+          .add(convolution(nInputPlane, nOutputPlane, 1, 1, stride, stride))
+          .add(sbn(nOutputPlane))
       } else {
         Identity()
       }
@@ -94,14 +107,14 @@ class MaskRCNN(val inChannels: Int,
     def bottleneck(nInputPlane: Int, internalPlane: Int, nOutputPlane: Int,
                    stride: Int, useConv: Boolean = false): Module[Float] = {
       val s = Sequential()
-        .add(Convolution(nInputPlane, internalPlane, 1, 1, stride, stride, 0, 0))
-        .add(Sbn(internalPlane))
+        .add(convolution(nInputPlane, internalPlane, 1, 1, stride, stride, 0, 0))
+        .add(sbn(internalPlane))
         .add(ReLU(true))
-        .add(Convolution(internalPlane, internalPlane, 3, 3, 1, 1, 1, 1))
-        .add(Sbn(internalPlane))
+        .add(convolution(internalPlane, internalPlane, 3, 3, 1, 1, 1, 1))
+        .add(sbn(internalPlane))
         .add(ReLU(true))
-        .add(Convolution(internalPlane, nOutputPlane, 1, 1, 1, 1, 0, 0))
-        .add(Sbn(nOutputPlane))
+        .add(convolution(internalPlane, nOutputPlane, 1, 1, 1, 1, 0, 0))
+        .add(sbn(nOutputPlane))
 
       val m = Sequential()
         .add(ConcatTable()
@@ -123,8 +136,8 @@ class MaskRCNN(val inChannels: Int,
     }
 
     val model = Sequential[Float]()
-      .add(Convolution(3, 64, 7, 7, 2, 2, 3, 3, optnet = false, propagateBack = false))
-      .add(Sbn(64))
+      .add(convolution(3, 64, 7, 7, 2, 2, 3, 3, propagateBack = false))
+      .add(sbn(64))
       .add(ReLU(true))
       .add(SpatialMaxPooling(3, 3, 2, 2, 1, 1))
 
@@ -164,12 +177,18 @@ class MaskRCNN(val inChannels: Int,
     val labelsBox = postProcessorBox[Tensor[Float]](1)
     val proposalsBox = postProcessorBox[Table](2)
     val scores = postProcessorBox[Tensor[Float]](3)
-    val masks = this.maskHead.forward(T(features, proposalsBox, labelsBox)).toTable
-    if (this.isTraining()) {
-      output = T(proposalsBox, labelsBox, masks, scores)
-    } else {
-      output = postProcessorForMaskRCNN(proposalsBox, labelsBox, masks[Tensor[Float]](2),
-        scores, imageInfo)
+    if (labelsBox.size(1) > 0) {
+      val masks = this.maskHead.forward(T(features, proposalsBox, labelsBox)).toTable
+      if (this.isTraining()) {
+        output = T(proposalsBox, labelsBox, masks, scores)
+      } else {
+        output = postProcessorForMaskRCNN(proposalsBox, labelsBox, masks[Tensor[Float]](2),
+          scores, imageInfo)
+      }
+    } else { // detect nothing
+      for (i <- 1 to inputFeatures.size(1)) {
+        output.toTable(i) = T()
+      }
     }
 
     output
@@ -196,36 +215,39 @@ class MaskRCNN(val inChannels: Int,
 
       binaryMask.resize(originalHeight, originalWidth)
 
-      val boxNumber = boxesInImage(i)
-      val maskPerImg = masks.narrow(1, start, boxNumber)
-      val bboxPerImg = bboxes[Tensor[Float]](i + 1)
-      val classPerImg = labels.narrow(1, start, boxNumber)
-      val scorePerImg = scores.narrow(1, start, boxNumber)
-
-      require(maskPerImg.size(1) == bboxPerImg.size(1),
-        s"mask number ${maskPerImg.size(1)} should be same with box number ${bboxPerImg.size(1)}")
-
-      // bbox resize to original size
-      if (height != originalHeight || width != originalWidth) {
-        BboxUtil.scaleBBox(bboxPerImg,
-          originalHeight.toFloat / height, originalWidth.toFloat / width)
-      }
-      // mask decode to original size
-      val masksRLE = new Array[RLEMasks](boxNumber)
-      for (j <- 0 to boxNumber - 1) {
-        binaryMask.fill(0.0f)
-        Utils.decodeMaskInImage(maskPerImg.select(1, j + 1), bboxPerImg.select(1, j + 1),
-          binaryMask = binaryMask)
-        masksRLE(j) = MaskUtils.binaryToRLE(binaryMask)
-      }
-      start += boxNumber
-
       // prepare for evaluation
       val postOutput = T()
-      postOutput.update(RoiLabel.MASKS, masksRLE)
-      postOutput.update(RoiLabel.BBOXES, bboxPerImg)
-      postOutput.update(RoiLabel.CLASSES, classPerImg)
-      postOutput.update(RoiLabel.SCORES, scorePerImg)
+
+      val boxNumber = boxesInImage(i)
+      if (boxNumber > 0) {
+        val maskPerImg = masks.narrow(1, start, boxNumber)
+        val bboxPerImg = bboxes[Tensor[Float]](i + 1)
+        val classPerImg = labels.narrow(1, start, boxNumber)
+        val scorePerImg = scores.narrow(1, start, boxNumber)
+
+        require(maskPerImg.size(1) == bboxPerImg.size(1), s"mask number ${maskPerImg.size(1)} " +
+          s"should be the same with box number ${bboxPerImg.size(1)}")
+
+        // resize bbox to original size
+        if (height != originalHeight || width != originalWidth) {
+          BboxUtil.scaleBBox(bboxPerImg,
+            originalHeight.toFloat / height, originalWidth.toFloat / width)
+        }
+        // decode mask to original size
+        val masksRLE = new Array[RLEMasks](boxNumber)
+        for (j <- 0 to boxNumber - 1) {
+          binaryMask.fill(0.0f)
+          Utils.decodeMaskInImage(maskPerImg.select(1, j + 1), bboxPerImg.select(1, j + 1),
+            binaryMask = binaryMask)
+          masksRLE(j) = MaskUtils.binaryToRLE(binaryMask)
+        }
+        start += boxNumber
+
+        postOutput.update(RoiLabel.MASKS, masksRLE)
+        postOutput.update(RoiLabel.BBOXES, bboxPerImg)
+        postOutput.update(RoiLabel.CLASSES, classPerImg)
+        postOutput.update(RoiLabel.SCORES, scorePerImg)
+      }
 
       output(i + 1) = postOutput
     }

diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/models/maskrcnn/README.md b/spark/dl/src/main/scala/com/intel/analytics/bigdl/models/maskrcnn/README.md
@@ -0,0 +1,71 @@
+# MaskRCNN
+This example demonstrates how to use BigDL to evaluate the [MaskRCNN](https://arxiv.org/abs/1703.06870) architecture on COCO data
+
+## Prepare the data
+* You can download [COCO dataset](<http://cocodataset.org/>) firstly.
+Extract the dataset and get images and annotations like (use **coco_2017_val** as example):
+```
+coco
+|_ coco_val2017
+|  |_ <im-1-name>.jpg
+|  |_ ...
+|  |_ <im-N-name>.jpg
+|_ annotations
+   |_ instances_train2017.json
+   |_ ...
+```
+
+* Generate the hadoop sequence files for COCO dataset
+The following command will transform the images and annotations into hadoop sequence files.
+```bash
+java -cp com.intel.analytics.bigdl.models.utils.COCOSeqFileGenerator bigdl-VERSION-jar-with-dependencies.jar -f ./coco/coco_val2017 -m ./coco/annotations/instances_val2017.json -p 4 -o ./coco/output
+```
+In the above commands:
+-f: the COCO image files location
+-m: the annotation json file location
+-o: generated seq files location
+-p: number of parallel
+
+## Data Processing
+Input data are transformed by several pipeline classes, such as ScaleResize, ChannelNormalize, ImageFeatureToBatch, etc.
+
+## Model
+You can download **preTrain-MaskRCNN model** for BigDL by running
+```bash
+wget https://bigdlmodels.s3-us-west-2.amazonaws.com/segmentation/bigdl_mask-rcnn_COCO_0.10.0.model
+```
+This MaskRCNN model refers to [facebookresearch/maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark), and the model backbone is **R-50-FPN**.
+
+## Test the Model
+* Spark standalone, example command
+```bash
+spark-submit \
+--master spark://xxx.xxx.xxx.xxx:xxxx \
+--executor-cores cores_per_executor \
+--total-executor-cores total_cores_for_the_job \
+--driver-class-path dist/lib/bigdl-VERSION-jar-with-dependencies.jar \
+--class com.intel.analytics.bigdl.models.maskrcnn.Test \
+dist/lib/bigdl-VERSION-jar-with-dependencies.jar \
+--batchSize batch_size \
+-f hdfs://.../coco/val \
+--model modelPath
+```
+* Spark yarn client mode, example command
+```bash
+spark-submit \
+--master yarn \
+--deploy-mode client \
+--executor-cores cores_per_executor \
+--num-executors executors_number \
+--driver-class-path dist/lib/bigdl-VERSION-jar-with-dependencies.jar \
+--class com.intel.analytics.bigdl.models.inception.Test \
+dist/lib/bigdl-VERSION-jar-with-dependencies.jar \
+--batchSize batch_size \
+-f hdfs://.../coco/val \
+--model modelPath
+```
+In the above command
+* -f: where you put your COCO data, it should be a hdfs folder
+* --model: the model snapshot file
+* --batchSize: The mini-batch size. It is expected that the mini-batch size is a multiple of node_number * core_number.
+* --partitionNum: the partition number, default is node_number * core_number.
diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/models/maskrcnn/Test.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/models/maskrcnn/Test.scala
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2016 The BigDL Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.intel.analytics.bigdl.models.maskrcnn
+
+import com.intel.analytics.bigdl.models.resnet.Utils.{TestParams, _}
+import com.intel.analytics.bigdl.transform.vision.image._
+import com.intel.analytics.bigdl.transform.vision.image.augmentation._
+import com.intel.analytics.bigdl.utils.{Engine, T}
+import scopt.OptionParser
+import com.intel.analytics.bigdl.dataset.{DataSet, MiniBatch, segmentation}
+import com.intel.analytics.bigdl.nn.Module
+import com.intel.analytics.bigdl.optim.MeanAveragePrecision
+import org.apache.spark.{SparkContext, rdd}
+object Test {
+  case class TestParams(
+     folder: String = "./",
+     model: String = "",
+     batchSize: Int = 2,
+     partitionNum: Int = -1
+   )
+
+  val testParser = new OptionParser[TestParams]("BigDL Mask-RCNN on COCO Test Example") {
+    opt[String]('f', "folder")
+      .text("the location of COCO dataset")
+      .action((x, c) => c.copy(folder = x))
+
+    opt[String]('m', "model")
+      .text("the location of model snapshot")
+      .action((x, c) => c.copy(model = x))
+
+    opt[Int]('b', "batchSize")
+      .text("total batch size")
+      .action((x, c) => c.copy(batchSize = x))
+
+    opt[Int]('p', "partitionNum")
+      .text("partition number")
+      .action((x, c) => c.copy(partitionNum = x))
+  }
+
+  def main(args: Array[String]): Unit = {
+    testParser.parse(args, TestParams()).foreach { param => {
+      val conf = Engine.createSparkConf().setAppName("Test MaskRCNN on COCO")
+        .set("spark.akka.frameSize", 64.toString)
+        .set("spark.task.maxFailures", "1")
+      val sc = new SparkContext(conf)
+      Engine.init
+
+      val partitionNum = if (param.partitionNum > 0) param.partitionNum
+      else Engine.nodeNumber() * Engine.coreNumber()
+
+      val rddData = DataSet.SeqFileFolder.filesToRoiImageFrame(param.folder, sc, Some(partitionNum))
+        .toDistributed().data(train = false)
+
+      val transformer = MTImageFeatureToBatchWithResize(
+        sizeDivisible = 32,
+        batchSize = param.batchSize / Engine.nodeNumber(),
+        transformer =
+          PixelBytesToMat() ->
+            ScaleResize(minSize = 800, maxSize = 1333) ->
+            ChannelNormalize(122.7717f, 115.9465f, 102.9801f) ->
+            MatToTensor[Float](),
+            toRGB = false
+        )
+      val evaluationSet = transformer(rddData)
+
+      val model = Module.loadModule[Float](param.model)
+
+      val result = model.evaluate(evaluationSet,
+        Array(MeanAveragePrecision.cocoBBox(81), MeanAveragePrecision.cocoSegmentation(81)))
+      result.foreach(r => println(s"${r._2} is ${r._1}"))
+
+      sc.stop()
+    }}
+  }
+}
diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/BoxHead.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/BoxHead.scala
@@ -342,6 +342,11 @@ private[nn] class BoxPostProcessor(
       totalDetections += maxDetection
       outBBoxs[Tensor[Float]](i + 1).resize(maxDetection, 4)
       totalROILables(i + 1) = roilabels
+      boxesInImage(i) = maxDetection
+    }
+    // clear others tensors in output
+    for (i <- (boxesInImage.length + 1) to outBBoxs.length()) {
+      outBBoxs.remove[Tensor[Float]](i)
     }
 
     // resize labels and scores
@@ -353,13 +358,15 @@ private[nn] class BoxPostProcessor(
     var labelsOffset = outLabels.storageOffset() - 1
     var scoresOffset = outScores.storageOffset() - 1
     for (i <- 0 to boxesInImage.length - 1) {
-      val roilabels = totalROILables[Array[RoiLabel]](i + 1)
-      val bbox = outBBoxs[Tensor[Float]](i + 1).storage().array()
-      val bboxOffset = outBBoxs[Tensor[Float]](i + 1).storageOffset() - 1
-
-      resultToTensor(roilabels, labels, labelsOffset, bbox, bboxOffset, scores, scoresOffset)
-      labelsOffset += outBBoxs[Tensor[Float]](i + 1).size(1)
-      scoresOffset += outBBoxs[Tensor[Float]](i + 1).size(1)
+      if (boxesInImage(i) > 0) {
+        val roilabels = totalROILables[Array[RoiLabel]](i + 1)
+        val bbox = outBBoxs[Tensor[Float]](i + 1).storage().array()
+        val bboxOffset = outBBoxs[Tensor[Float]](i + 1).storageOffset() - 1
+
+        resultToTensor(roilabels, labels, labelsOffset, bbox, bboxOffset, scores, scoresOffset)
+        labelsOffset += outBBoxs[Tensor[Float]](i + 1).size(1)
+        scoresOffset += outBBoxs[Tensor[Float]](i + 1).size(1)
+      }
     }
 
     output