add maskrcnn inference example (intel-analytics#2944)

* add maskrcnn inference example * meet pr comments * add model download url
dding3 · Oct 28, 2019 · b4d930a · b4d930a
1 parent 2861c45
commit b4d930a
Show file tree

Hide file tree

Showing 11 changed files with 294 additions and 92 deletions.
diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/DataSet.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/DataSet.scala
@@ -356,13 +356,14 @@ object DataSet {
   /**
    * Wrap a RDD as a DataSet.
    * @param data
+   * @param partitionNum repartition data rdd to partition number, default node number.
    * @tparam T
    * @return
    */
-  def rdd[T: ClassTag](data: RDD[T]): DistributedDataSet[T] = {
-    val nodeNumber = Engine.nodeNumber()
+  def rdd[T: ClassTag](data: RDD[T], partitionNum: Int = Engine.nodeNumber()
+    ): DistributedDataSet[T] = {
     new CachedDistriDataSet[T](
-      data.coalesce(nodeNumber, true)
+      data.coalesce(partitionNum, true)
         .mapPartitions(iter => {
           Iterator.single(iter.toArray)
         }).setName("cached dataset")
@@ -646,7 +647,7 @@ object DataSet {
           imf
         }
         .coalesce(num)
-     DataSet.rdd(rawData)
+     DataSet.rdd(rawData, num)
     }
 
     private[bigdl] def filesToImageFeatureDataset(url: String, sc: SparkContext,

diff --git a/...om/intel/analytics/bigdl/dllib/feature/transform/vision/image/MTImageFeatureToBatch.scala b/...om/intel/analytics/bigdl/dllib/feature/transform/vision/image/MTImageFeatureToBatch.scala
@@ -16,11 +16,14 @@
 package com.intel.analytics.bigdl.transform.vision.image
 
 import java.util.concurrent.atomic.AtomicInteger
+
 import com.intel.analytics.bigdl.dataset.{MiniBatch, Sample, Transformer, Utils}
+import com.intel.analytics.bigdl.nn.abstractnn.Activity
 import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
 import com.intel.analytics.bigdl.tensor.{Storage, Tensor}
 import com.intel.analytics.bigdl.transform.vision.image.label.roi.RoiLabel
 import com.intel.analytics.bigdl.utils.{Engine, T, Table}
+
 import scala.collection.mutable.IndexedSeq
 import scala.reflect.ClassTag
 
@@ -199,23 +202,26 @@ class ClassificationMTImageFeatureToBatch private[bigdl](width: Int, height: Int
  *                    elements. The inner tensor holds the data for segmentation
  * RoiLabel.ISCROWD   Whether each detection is crowd. (1 x N) Tensor[Float].
  *                    -1: unknown, 0: not crowd, 1: is crowd
- * RoiLabel.ORIGSIZE  The original size of the image, tuple of (height, width, channels)
+ * RoiLabel.ImageInfo  with shape (batchSize, 4), contains all images info
+ *                 (height, width, original height, original width)
  */
 class RoiMiniBatch(val input: Tensor[Float], val target: IndexedSeq[RoiLabel],
-  val isCrowd: IndexedSeq[Tensor[Float]], val originalSizes: IndexedSeq[(Int, Int, Int)])
+  val isCrowd: IndexedSeq[Tensor[Float]], val imageInfo: Tensor[Float] = null)
   extends MiniBatch[Float] {
 
-  override def size(): Int = {
-    input.size(1)
-  }
+  override def size(): Int = input.size(1)
 
-  override def getInput(): Tensor[Float] = input
+  override def getInput(): Activity = {
+    if (imageInfo == null) input else T(input, imageInfo)
+  }
 
   override def getTarget(): Table = {
-    val tables = (target, isCrowd, originalSizes).zipped.map { case (roiLabel, crowd, size) =>
+    var i = 0
+    val tables = (target, isCrowd).zipped.map { case (roiLabel, crowd) =>
+      i += 1
       roiLabel.toTable
         .update(RoiLabel.ISCROWD, crowd)
-        .update(RoiLabel.ORIGSIZE, size)
+        .update(RoiLabel.IMGINFO, imageInfo.select(1, i))
     }
     T.seq(tables)
   }
@@ -224,7 +230,7 @@ class RoiMiniBatch(val input: Tensor[Float], val target: IndexedSeq[RoiLabel],
     val subInput = input.narrow(1, offset, length)
     val subTarget = target.view(offset - 1, length) // offset starts from 1
     val subIsCrowd = isCrowd.view(offset - 1, length) // offset starts from 1
-    val subSize = originalSizes.view(offset - 1, length) // offset starts from 1
+    val subSize = imageInfo.narrow(1, offset, length)
     RoiMiniBatch(subInput, subTarget, subIsCrowd, subSize)
   }
 
@@ -236,8 +242,8 @@ class RoiMiniBatch(val input: Tensor[Float], val target: IndexedSeq[RoiLabel],
 
 object RoiMiniBatch {
   def apply(data: Tensor[Float], target: IndexedSeq[RoiLabel],
-    isCrowd: IndexedSeq[Tensor[Float]], originalSizes: IndexedSeq[(Int, Int, Int)]):
-  RoiMiniBatch = new RoiMiniBatch(data, target, isCrowd, originalSizes)
+    isCrowd: IndexedSeq[Tensor[Float]], imageInfo: Tensor[Float] = null):
+  RoiMiniBatch = new RoiMiniBatch(data, target, isCrowd, imageInfo)
 }
 
 
@@ -259,7 +265,6 @@ class RoiMTImageFeatureToBatch private[bigdl](width: Int, height: Int,
   private val featureData: Array[Float] = new Array[Float](batchSize * frameLength * 3)
   private val labelData: Array[RoiLabel] = new Array[RoiLabel](batchSize)
   private val isCrowdData: Array[Tensor[Float]] = new Array[Tensor[Float]](batchSize)
-  private val origSizeData: Array[(Int, Int, Int)] = new Array[(Int, Int, Int)](batchSize)
   private var featureTensor: Tensor[Float] = null
 
   override protected def processImageFeature(img: ImageFeature, position: Int): Unit = {
@@ -270,15 +275,14 @@ class RoiMTImageFeatureToBatch private[bigdl](width: Int, height: Int,
       "in ImageFeature's ISCROWD should be equal to the number of detections in the RoiLabel")
     isCrowdData(position) = isCrowd
     labelData(position) = label
-    origSizeData(position) = img.getOriginalSize
   }
 
   override protected def createBatch(batchSize: Int): MiniBatch[Float] = {
     if (featureTensor == null) {
       featureTensor = Tensor(Storage[Float](featureData),
         storageOffset = 1, size = Array(batchSize, 3, height, width))
     }
-    RoiMiniBatch(featureTensor, labelData.view, isCrowdData.view, origSizeData.view)
+    RoiMiniBatch(featureTensor, labelData.view, isCrowdData.view)
   }
 }
 
@@ -298,7 +302,7 @@ class RoiImageFeatureToBatchWithResize private[bigdl](sizeDivisible: Int = -1, t
 
   private val labelData: Array[RoiLabel] = new Array[RoiLabel](batchSize)
   private val isCrowdData: Array[Tensor[Float]] = new Array[Tensor[Float]](batchSize)
-  private val origSizeData: Array[(Int, Int, Int)] = new Array[(Int, Int, Int)](batchSize)
+  private val imgInfoData: Tensor[Float] = Tensor[Float](batchSize, 4)
   private var featureTensor: Tensor[Float] = null
   private val imageBuffer = new Array[Tensor[Float]](batchSize)
 
@@ -324,11 +328,16 @@ class RoiImageFeatureToBatchWithResize private[bigdl](sizeDivisible: Int = -1, t
     img.copyTo(imageBuffer(position).storage().array(), 0, toRGB = toRGB)
     val isCrowd = img(RoiLabel.ISCROWD).asInstanceOf[Tensor[Float]]
     val label = img.getLabel.asInstanceOf[RoiLabel]
-    require(label.bboxes.size(1) == isCrowd.size(1), "The number of detections" +
-      "in ImageFeature's ISCROWD should be equal to the number of detections in the RoiLabel")
+    if (isCrowd != null && label != null) {
+      require(label.bboxes.size(1) == isCrowd.size(1), "The number of detections" +
+        "in ImageFeature's ISCROWD should be equal to the number of detections in the RoiLabel")
+    }
     isCrowdData(position) = isCrowd
     labelData(position) = label
-    origSizeData(position) = img.getOriginalSize
+    imgInfoData.setValue(position + 1, 1, img.getHeight())
+    imgInfoData.setValue(position + 1, 2, img.getWidth())
+    imgInfoData.setValue(position + 1, 3, img.getOriginalHeight)
+    imgInfoData.setValue(position + 1, 4, img.getOriginalWidth)
   }
 
   override protected def createBatch(batchSize: Int): MiniBatch[Float] = {
@@ -341,6 +350,6 @@ class RoiImageFeatureToBatchWithResize private[bigdl](sizeDivisible: Int = -1, t
         .narrow(3, 1, imageBuffer(i).size(3)).copy(imageBuffer(i))
     }
     RoiMiniBatch(featureTensor, labelData.view(0, batchSize),
-      isCrowdData.view(0, batchSize), origSizeData.view(0, batchSize))
+      isCrowdData.view(0, batchSize), imgInfoData.narrow(1, 1, batchSize))
   }
 }
diff --git a/...a/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/label/roi/RoiLabel.scala b/...a/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/label/roi/RoiLabel.scala
@@ -75,20 +75,16 @@ object RoiLabel {
   val MASKS = "masks"
   // ISCROWD and ORIGSIZE are stored in ImageFeature
   val ISCROWD = "is_crowd"
-  val ORIGSIZE = "size"
+  val IMGINFO = "imgInfo"
   val SCORES = "scores"
 
-  def getScores(tab: Table): Tensor[Float] = tab[Tensor[Float]](SCORES)
+
   def getClasses(tab: Table): Tensor[Float] = tab[Tensor[Float]](CLASSES)
   def getBBoxes(tab: Table): Tensor[Float] = tab[Tensor[Float]](BBOXES)
   def getMasks(tab: Table): Array[RLEMasks] = tab[Array[RLEMasks]](MASKS)
   def getIsCrowd(tab: Table): Tensor[Float] = tab[Tensor[Float]](ISCROWD)
-
-  /**
-   * @return (height, width, channel)
-   */
-  def getOrigSize(tab: Table): (Int, Int, Int) =
-    tab[(Int, Int, Int)](ORIGSIZE)
+  def getScores(tab: Table): Tensor[Float] = tab[Tensor[Float]](SCORES)
+  def getImgInfo(tab: Table): Tensor[Float] = tab[Tensor[Float]](IMGINFO)
 
 
   def fromTensor(tensor: Tensor[Float]): RoiLabel = {

diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/MaskRCNN.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/MaskRCNN.scala
@@ -18,7 +18,6 @@ package com.intel.analytics.bigdl.models.maskrcnn
 
 import com.intel.analytics.bigdl.Module
 import com.intel.analytics.bigdl.dataset.segmentation.{MaskUtils, RLEMasks}
-import com.intel.analytics.bigdl.models.resnet.{Convolution, Sbn}
 import com.intel.analytics.bigdl.nn._
 import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, Activity}
 import com.intel.analytics.bigdl.serialization.Bigdl.{AttrValue, BigDLModule}
@@ -78,14 +77,28 @@ class MaskRCNN(val inChannels: Int,
     modules.append(boxHead.asInstanceOf[Module[Float]])
     modules.append(maskHead.asInstanceOf[Module[Float]])
 
-    private def buildResNet50(): Module[Float] = {
+  private def buildResNet50(): Module[Float] = {
+
+    def convolution (nInputPlane: Int, nOutputPlane: Int, kernelW: Int, kernelH: Int,
+      strideW: Int = 1, strideH: Int = 1, padW: Int = 0, padH: Int = 0,
+      nGroup: Int = 1, propagateBack: Boolean = true): SpatialConvolution[Float] = {
+        val conv = SpatialConvolution[Float](nInputPlane, nOutputPlane, kernelW, kernelH,
+          strideW, strideH, padW, padH, nGroup, propagateBack, withBias = false)
+        conv.setInitMethod(MsraFiller(false), Zeros)
+        conv
+      }
+
+    def sbn(nOutput: Int, eps: Double = 1e-3, momentum: Double = 0.1, affine: Boolean = true)
+      : SpatialBatchNormalization[Float] = {
+        SpatialBatchNormalization[Float](nOutput, eps, momentum, affine).setInitMethod(Ones, Zeros)
+      }
 
     def shortcut(nInputPlane: Int, nOutputPlane: Int, stride: Int,
                  useConv: Boolean = false): Module[Float] = {
       if (useConv) {
         Sequential()
-          .add(Convolution(nInputPlane, nOutputPlane, 1, 1, stride, stride))
-          .add(Sbn(nOutputPlane))
+          .add(convolution(nInputPlane, nOutputPlane, 1, 1, stride, stride))
+          .add(sbn(nOutputPlane))
       } else {
         Identity()
       }
@@ -94,14 +107,14 @@ class MaskRCNN(val inChannels: Int,
     def bottleneck(nInputPlane: Int, internalPlane: Int, nOutputPlane: Int,
                    stride: Int, useConv: Boolean = false): Module[Float] = {
       val s = Sequential()
-        .add(Convolution(nInputPlane, internalPlane, 1, 1, stride, stride, 0, 0))
-        .add(Sbn(internalPlane))
+        .add(convolution(nInputPlane, internalPlane, 1, 1, stride, stride, 0, 0))
+        .add(sbn(internalPlane))
         .add(ReLU(true))
-        .add(Convolution(internalPlane, internalPlane, 3, 3, 1, 1, 1, 1))
-        .add(Sbn(internalPlane))
+        .add(convolution(internalPlane, internalPlane, 3, 3, 1, 1, 1, 1))
+        .add(sbn(internalPlane))
         .add(ReLU(true))
-        .add(Convolution(internalPlane, nOutputPlane, 1, 1, 1, 1, 0, 0))
-        .add(Sbn(nOutputPlane))
+        .add(convolution(internalPlane, nOutputPlane, 1, 1, 1, 1, 0, 0))
+        .add(sbn(nOutputPlane))
 
       val m = Sequential()
         .add(ConcatTable()
@@ -123,8 +136,8 @@ class MaskRCNN(val inChannels: Int,
     }
 
     val model = Sequential[Float]()
-      .add(Convolution(3, 64, 7, 7, 2, 2, 3, 3, optnet = false, propagateBack = false))
-      .add(Sbn(64))
+      .add(convolution(3, 64, 7, 7, 2, 2, 3, 3, propagateBack = false))
+      .add(sbn(64))
       .add(ReLU(true))
       .add(SpatialMaxPooling(3, 3, 2, 2, 1, 1))
 
@@ -164,12 +177,18 @@ class MaskRCNN(val inChannels: Int,
     val labelsBox = postProcessorBox[Tensor[Float]](1)
     val proposalsBox = postProcessorBox[Table](2)
     val scores = postProcessorBox[Tensor[Float]](3)
-    val masks = this.maskHead.forward(T(features, proposalsBox, labelsBox)).toTable
-    if (this.isTraining()) {
-      output = T(proposalsBox, labelsBox, masks, scores)
-    } else {
-      output = postProcessorForMaskRCNN(proposalsBox, labelsBox, masks[Tensor[Float]](2),
-        scores, imageInfo)
+    if (labelsBox.size(1) > 0) {
+      val masks = this.maskHead.forward(T(features, proposalsBox, labelsBox)).toTable
+      if (this.isTraining()) {
+        output = T(proposalsBox, labelsBox, masks, scores)
+      } else {
+        output = postProcessorForMaskRCNN(proposalsBox, labelsBox, masks[Tensor[Float]](2),
+          scores, imageInfo)
+      }
+    } else { // detect nothing
+      for (i <- 1 to inputFeatures.size(1)) {
+        output.toTable(i) = T()
+      }
     }
 
     output
@@ -196,36 +215,39 @@ class MaskRCNN(val inChannels: Int,
 
       binaryMask.resize(originalHeight, originalWidth)
 
-      val boxNumber = boxesInImage(i)
-      val maskPerImg = masks.narrow(1, start, boxNumber)
-      val bboxPerImg = bboxes[Tensor[Float]](i + 1)
-      val classPerImg = labels.narrow(1, start, boxNumber)
-      val scorePerImg = scores.narrow(1, start, boxNumber)
-
-      require(maskPerImg.size(1) == bboxPerImg.size(1),
-        s"mask number ${maskPerImg.size(1)} should be same with box number ${bboxPerImg.size(1)}")
-
-      // bbox resize to original size
-      if (height != originalHeight || width != originalWidth) {
-        BboxUtil.scaleBBox(bboxPerImg,
-          originalHeight.toFloat / height, originalWidth.toFloat / width)
-      }
-      // mask decode to original size
-      val masksRLE = new Array[RLEMasks](boxNumber)
-      for (j <- 0 to boxNumber - 1) {
-        binaryMask.fill(0.0f)
-        Utils.decodeMaskInImage(maskPerImg.select(1, j + 1), bboxPerImg.select(1, j + 1),
-          binaryMask = binaryMask)
-        masksRLE(j) = MaskUtils.binaryToRLE(binaryMask)
-      }
-      start += boxNumber
-
       // prepare for evaluation
       val postOutput = T()
-      postOutput.update(RoiLabel.MASKS, masksRLE)
-      postOutput.update(RoiLabel.BBOXES, bboxPerImg)
-      postOutput.update(RoiLabel.CLASSES, classPerImg)
-      postOutput.update(RoiLabel.SCORES, scorePerImg)
+
+      val boxNumber = boxesInImage(i)
+      if (boxNumber > 0) {
+        val maskPerImg = masks.narrow(1, start, boxNumber)
+        val bboxPerImg = bboxes[Tensor[Float]](i + 1)
+        val classPerImg = labels.narrow(1, start, boxNumber)
+        val scorePerImg = scores.narrow(1, start, boxNumber)
+
+        require(maskPerImg.size(1) == bboxPerImg.size(1), s"mask number ${maskPerImg.size(1)} " +
+          s"should be the same with box number ${bboxPerImg.size(1)}")
+
+        // resize bbox to original size
+        if (height != originalHeight || width != originalWidth) {
+          BboxUtil.scaleBBox(bboxPerImg,
+            originalHeight.toFloat / height, originalWidth.toFloat / width)
+        }
+        // decode mask to original size
+        val masksRLE = new Array[RLEMasks](boxNumber)
+        for (j <- 0 to boxNumber - 1) {
+          binaryMask.fill(0.0f)
+          Utils.decodeMaskInImage(maskPerImg.select(1, j + 1), bboxPerImg.select(1, j + 1),
+            binaryMask = binaryMask)
+          masksRLE(j) = MaskUtils.binaryToRLE(binaryMask)
+        }
+        start += boxNumber
+
+        postOutput.update(RoiLabel.MASKS, masksRLE)
+        postOutput.update(RoiLabel.BBOXES, bboxPerImg)
+        postOutput.update(RoiLabel.CLASSES, classPerImg)
+        postOutput.update(RoiLabel.SCORES, scorePerImg)
+      }
 
       output(i + 1) = postOutput
     }