diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/DataSet.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/DataSet.scala
index 7c5217546d5..0b491a63c51 100644
--- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/DataSet.scala
+++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/DataSet.scala
@@ -356,13 +356,14 @@ object DataSet {
/**
* Wrap a RDD as a DataSet.
* @param data
+ * @param partitionNum repartition data rdd to partition number, default node number.
* @tparam T
* @return
*/
- def rdd[T: ClassTag](data: RDD[T]): DistributedDataSet[T] = {
- val nodeNumber = Engine.nodeNumber()
+ def rdd[T: ClassTag](data: RDD[T], partitionNum: Int = Engine.nodeNumber()
+ ): DistributedDataSet[T] = {
new CachedDistriDataSet[T](
- data.coalesce(nodeNumber, true)
+ data.coalesce(partitionNum, true)
.mapPartitions(iter => {
Iterator.single(iter.toArray)
}).setName("cached dataset")
@@ -646,7 +647,7 @@ object DataSet {
imf
}
.coalesce(num)
- DataSet.rdd(rawData)
+ DataSet.rdd(rawData, num)
}
private[bigdl] def filesToImageFeatureDataset(url: String, sc: SparkContext,
diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/MTImageFeatureToBatch.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/MTImageFeatureToBatch.scala
index 20f1fb409a4..a88741aa6cb 100644
--- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/MTImageFeatureToBatch.scala
+++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/MTImageFeatureToBatch.scala
@@ -16,11 +16,14 @@
package com.intel.analytics.bigdl.transform.vision.image
import java.util.concurrent.atomic.AtomicInteger
+
import com.intel.analytics.bigdl.dataset.{MiniBatch, Sample, Transformer, Utils}
+import com.intel.analytics.bigdl.nn.abstractnn.Activity
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.bigdl.tensor.{Storage, Tensor}
import com.intel.analytics.bigdl.transform.vision.image.label.roi.RoiLabel
import com.intel.analytics.bigdl.utils.{Engine, T, Table}
+
import scala.collection.mutable.IndexedSeq
import scala.reflect.ClassTag
@@ -199,23 +202,26 @@ class ClassificationMTImageFeatureToBatch private[bigdl](width: Int, height: Int
* elements. The inner tensor holds the data for segmentation
* RoiLabel.ISCROWD Whether each detection is crowd. (1 x N) Tensor[Float].
* -1: unknown, 0: not crowd, 1: is crowd
- * RoiLabel.ORIGSIZE The original size of the image, tuple of (height, width, channels)
+ * RoiLabel.ImageInfo with shape (batchSize, 4), contains all images info
+ * (height, width, original height, original width)
*/
class RoiMiniBatch(val input: Tensor[Float], val target: IndexedSeq[RoiLabel],
- val isCrowd: IndexedSeq[Tensor[Float]], val originalSizes: IndexedSeq[(Int, Int, Int)])
+ val isCrowd: IndexedSeq[Tensor[Float]], val imageInfo: Tensor[Float] = null)
extends MiniBatch[Float] {
- override def size(): Int = {
- input.size(1)
- }
+ override def size(): Int = input.size(1)
- override def getInput(): Tensor[Float] = input
+ override def getInput(): Activity = {
+ if (imageInfo == null) input else T(input, imageInfo)
+ }
override def getTarget(): Table = {
- val tables = (target, isCrowd, originalSizes).zipped.map { case (roiLabel, crowd, size) =>
+ var i = 0
+ val tables = (target, isCrowd).zipped.map { case (roiLabel, crowd) =>
+ i += 1
roiLabel.toTable
.update(RoiLabel.ISCROWD, crowd)
- .update(RoiLabel.ORIGSIZE, size)
+ .update(RoiLabel.IMGINFO, imageInfo.select(1, i))
}
T.seq(tables)
}
@@ -224,7 +230,7 @@ class RoiMiniBatch(val input: Tensor[Float], val target: IndexedSeq[RoiLabel],
val subInput = input.narrow(1, offset, length)
val subTarget = target.view(offset - 1, length) // offset starts from 1
val subIsCrowd = isCrowd.view(offset - 1, length) // offset starts from 1
- val subSize = originalSizes.view(offset - 1, length) // offset starts from 1
+ val subSize = imageInfo.narrow(1, offset, length)
RoiMiniBatch(subInput, subTarget, subIsCrowd, subSize)
}
@@ -236,8 +242,8 @@ class RoiMiniBatch(val input: Tensor[Float], val target: IndexedSeq[RoiLabel],
object RoiMiniBatch {
def apply(data: Tensor[Float], target: IndexedSeq[RoiLabel],
- isCrowd: IndexedSeq[Tensor[Float]], originalSizes: IndexedSeq[(Int, Int, Int)]):
- RoiMiniBatch = new RoiMiniBatch(data, target, isCrowd, originalSizes)
+ isCrowd: IndexedSeq[Tensor[Float]], imageInfo: Tensor[Float] = null):
+ RoiMiniBatch = new RoiMiniBatch(data, target, isCrowd, imageInfo)
}
@@ -259,7 +265,6 @@ class RoiMTImageFeatureToBatch private[bigdl](width: Int, height: Int,
private val featureData: Array[Float] = new Array[Float](batchSize * frameLength * 3)
private val labelData: Array[RoiLabel] = new Array[RoiLabel](batchSize)
private val isCrowdData: Array[Tensor[Float]] = new Array[Tensor[Float]](batchSize)
- private val origSizeData: Array[(Int, Int, Int)] = new Array[(Int, Int, Int)](batchSize)
private var featureTensor: Tensor[Float] = null
override protected def processImageFeature(img: ImageFeature, position: Int): Unit = {
@@ -270,7 +275,6 @@ class RoiMTImageFeatureToBatch private[bigdl](width: Int, height: Int,
"in ImageFeature's ISCROWD should be equal to the number of detections in the RoiLabel")
isCrowdData(position) = isCrowd
labelData(position) = label
- origSizeData(position) = img.getOriginalSize
}
override protected def createBatch(batchSize: Int): MiniBatch[Float] = {
@@ -278,7 +282,7 @@ class RoiMTImageFeatureToBatch private[bigdl](width: Int, height: Int,
featureTensor = Tensor(Storage[Float](featureData),
storageOffset = 1, size = Array(batchSize, 3, height, width))
}
- RoiMiniBatch(featureTensor, labelData.view, isCrowdData.view, origSizeData.view)
+ RoiMiniBatch(featureTensor, labelData.view, isCrowdData.view)
}
}
@@ -298,7 +302,7 @@ class RoiImageFeatureToBatchWithResize private[bigdl](sizeDivisible: Int = -1, t
private val labelData: Array[RoiLabel] = new Array[RoiLabel](batchSize)
private val isCrowdData: Array[Tensor[Float]] = new Array[Tensor[Float]](batchSize)
- private val origSizeData: Array[(Int, Int, Int)] = new Array[(Int, Int, Int)](batchSize)
+ private val imgInfoData: Tensor[Float] = Tensor[Float](batchSize, 4)
private var featureTensor: Tensor[Float] = null
private val imageBuffer = new Array[Tensor[Float]](batchSize)
@@ -324,11 +328,16 @@ class RoiImageFeatureToBatchWithResize private[bigdl](sizeDivisible: Int = -1, t
img.copyTo(imageBuffer(position).storage().array(), 0, toRGB = toRGB)
val isCrowd = img(RoiLabel.ISCROWD).asInstanceOf[Tensor[Float]]
val label = img.getLabel.asInstanceOf[RoiLabel]
- require(label.bboxes.size(1) == isCrowd.size(1), "The number of detections" +
- "in ImageFeature's ISCROWD should be equal to the number of detections in the RoiLabel")
+ if (isCrowd != null && label != null) {
+ require(label.bboxes.size(1) == isCrowd.size(1), "The number of detections" +
+ "in ImageFeature's ISCROWD should be equal to the number of detections in the RoiLabel")
+ }
isCrowdData(position) = isCrowd
labelData(position) = label
- origSizeData(position) = img.getOriginalSize
+ imgInfoData.setValue(position + 1, 1, img.getHeight())
+ imgInfoData.setValue(position + 1, 2, img.getWidth())
+ imgInfoData.setValue(position + 1, 3, img.getOriginalHeight)
+ imgInfoData.setValue(position + 1, 4, img.getOriginalWidth)
}
override protected def createBatch(batchSize: Int): MiniBatch[Float] = {
@@ -341,6 +350,6 @@ class RoiImageFeatureToBatchWithResize private[bigdl](sizeDivisible: Int = -1, t
.narrow(3, 1, imageBuffer(i).size(3)).copy(imageBuffer(i))
}
RoiMiniBatch(featureTensor, labelData.view(0, batchSize),
- isCrowdData.view(0, batchSize), origSizeData.view(0, batchSize))
+ isCrowdData.view(0, batchSize), imgInfoData.narrow(1, 1, batchSize))
}
}
diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/label/roi/RoiLabel.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/label/roi/RoiLabel.scala
index 19d3eab5631..9bfd21a9c51 100644
--- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/label/roi/RoiLabel.scala
+++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/label/roi/RoiLabel.scala
@@ -75,20 +75,16 @@ object RoiLabel {
val MASKS = "masks"
// ISCROWD and ORIGSIZE are stored in ImageFeature
val ISCROWD = "is_crowd"
- val ORIGSIZE = "size"
+ val IMGINFO = "imgInfo"
val SCORES = "scores"
- def getScores(tab: Table): Tensor[Float] = tab[Tensor[Float]](SCORES)
+
def getClasses(tab: Table): Tensor[Float] = tab[Tensor[Float]](CLASSES)
def getBBoxes(tab: Table): Tensor[Float] = tab[Tensor[Float]](BBOXES)
def getMasks(tab: Table): Array[RLEMasks] = tab[Array[RLEMasks]](MASKS)
def getIsCrowd(tab: Table): Tensor[Float] = tab[Tensor[Float]](ISCROWD)
-
- /**
- * @return (height, width, channel)
- */
- def getOrigSize(tab: Table): (Int, Int, Int) =
- tab[(Int, Int, Int)](ORIGSIZE)
+ def getScores(tab: Table): Tensor[Float] = tab[Tensor[Float]](SCORES)
+ def getImgInfo(tab: Table): Tensor[Float] = tab[Tensor[Float]](IMGINFO)
def fromTensor(tensor: Tensor[Float]): RoiLabel = {
diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/MaskRCNN.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/MaskRCNN.scala
index f7587cae4b1..4d4f1d59fd2 100644
--- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/MaskRCNN.scala
+++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/MaskRCNN.scala
@@ -18,7 +18,6 @@ package com.intel.analytics.bigdl.models.maskrcnn
import com.intel.analytics.bigdl.Module
import com.intel.analytics.bigdl.dataset.segmentation.{MaskUtils, RLEMasks}
-import com.intel.analytics.bigdl.models.resnet.{Convolution, Sbn}
import com.intel.analytics.bigdl.nn._
import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, Activity}
import com.intel.analytics.bigdl.serialization.Bigdl.{AttrValue, BigDLModule}
@@ -78,14 +77,28 @@ class MaskRCNN(val inChannels: Int,
modules.append(boxHead.asInstanceOf[Module[Float]])
modules.append(maskHead.asInstanceOf[Module[Float]])
- private def buildResNet50(): Module[Float] = {
+ private def buildResNet50(): Module[Float] = {
+
+ def convolution (nInputPlane: Int, nOutputPlane: Int, kernelW: Int, kernelH: Int,
+ strideW: Int = 1, strideH: Int = 1, padW: Int = 0, padH: Int = 0,
+ nGroup: Int = 1, propagateBack: Boolean = true): SpatialConvolution[Float] = {
+ val conv = SpatialConvolution[Float](nInputPlane, nOutputPlane, kernelW, kernelH,
+ strideW, strideH, padW, padH, nGroup, propagateBack, withBias = false)
+ conv.setInitMethod(MsraFiller(false), Zeros)
+ conv
+ }
+
+ def sbn(nOutput: Int, eps: Double = 1e-3, momentum: Double = 0.1, affine: Boolean = true)
+ : SpatialBatchNormalization[Float] = {
+ SpatialBatchNormalization[Float](nOutput, eps, momentum, affine).setInitMethod(Ones, Zeros)
+ }
def shortcut(nInputPlane: Int, nOutputPlane: Int, stride: Int,
useConv: Boolean = false): Module[Float] = {
if (useConv) {
Sequential()
- .add(Convolution(nInputPlane, nOutputPlane, 1, 1, stride, stride))
- .add(Sbn(nOutputPlane))
+ .add(convolution(nInputPlane, nOutputPlane, 1, 1, stride, stride))
+ .add(sbn(nOutputPlane))
} else {
Identity()
}
@@ -94,14 +107,14 @@ class MaskRCNN(val inChannels: Int,
def bottleneck(nInputPlane: Int, internalPlane: Int, nOutputPlane: Int,
stride: Int, useConv: Boolean = false): Module[Float] = {
val s = Sequential()
- .add(Convolution(nInputPlane, internalPlane, 1, 1, stride, stride, 0, 0))
- .add(Sbn(internalPlane))
+ .add(convolution(nInputPlane, internalPlane, 1, 1, stride, stride, 0, 0))
+ .add(sbn(internalPlane))
.add(ReLU(true))
- .add(Convolution(internalPlane, internalPlane, 3, 3, 1, 1, 1, 1))
- .add(Sbn(internalPlane))
+ .add(convolution(internalPlane, internalPlane, 3, 3, 1, 1, 1, 1))
+ .add(sbn(internalPlane))
.add(ReLU(true))
- .add(Convolution(internalPlane, nOutputPlane, 1, 1, 1, 1, 0, 0))
- .add(Sbn(nOutputPlane))
+ .add(convolution(internalPlane, nOutputPlane, 1, 1, 1, 1, 0, 0))
+ .add(sbn(nOutputPlane))
val m = Sequential()
.add(ConcatTable()
@@ -123,8 +136,8 @@ class MaskRCNN(val inChannels: Int,
}
val model = Sequential[Float]()
- .add(Convolution(3, 64, 7, 7, 2, 2, 3, 3, optnet = false, propagateBack = false))
- .add(Sbn(64))
+ .add(convolution(3, 64, 7, 7, 2, 2, 3, 3, propagateBack = false))
+ .add(sbn(64))
.add(ReLU(true))
.add(SpatialMaxPooling(3, 3, 2, 2, 1, 1))
@@ -164,12 +177,18 @@ class MaskRCNN(val inChannels: Int,
val labelsBox = postProcessorBox[Tensor[Float]](1)
val proposalsBox = postProcessorBox[Table](2)
val scores = postProcessorBox[Tensor[Float]](3)
- val masks = this.maskHead.forward(T(features, proposalsBox, labelsBox)).toTable
- if (this.isTraining()) {
- output = T(proposalsBox, labelsBox, masks, scores)
- } else {
- output = postProcessorForMaskRCNN(proposalsBox, labelsBox, masks[Tensor[Float]](2),
- scores, imageInfo)
+ if (labelsBox.size(1) > 0) {
+ val masks = this.maskHead.forward(T(features, proposalsBox, labelsBox)).toTable
+ if (this.isTraining()) {
+ output = T(proposalsBox, labelsBox, masks, scores)
+ } else {
+ output = postProcessorForMaskRCNN(proposalsBox, labelsBox, masks[Tensor[Float]](2),
+ scores, imageInfo)
+ }
+ } else { // detect nothing
+ for (i <- 1 to inputFeatures.size(1)) {
+ output.toTable(i) = T()
+ }
}
output
@@ -196,36 +215,39 @@ class MaskRCNN(val inChannels: Int,
binaryMask.resize(originalHeight, originalWidth)
- val boxNumber = boxesInImage(i)
- val maskPerImg = masks.narrow(1, start, boxNumber)
- val bboxPerImg = bboxes[Tensor[Float]](i + 1)
- val classPerImg = labels.narrow(1, start, boxNumber)
- val scorePerImg = scores.narrow(1, start, boxNumber)
-
- require(maskPerImg.size(1) == bboxPerImg.size(1),
- s"mask number ${maskPerImg.size(1)} should be same with box number ${bboxPerImg.size(1)}")
-
- // bbox resize to original size
- if (height != originalHeight || width != originalWidth) {
- BboxUtil.scaleBBox(bboxPerImg,
- originalHeight.toFloat / height, originalWidth.toFloat / width)
- }
- // mask decode to original size
- val masksRLE = new Array[RLEMasks](boxNumber)
- for (j <- 0 to boxNumber - 1) {
- binaryMask.fill(0.0f)
- Utils.decodeMaskInImage(maskPerImg.select(1, j + 1), bboxPerImg.select(1, j + 1),
- binaryMask = binaryMask)
- masksRLE(j) = MaskUtils.binaryToRLE(binaryMask)
- }
- start += boxNumber
-
// prepare for evaluation
val postOutput = T()
- postOutput.update(RoiLabel.MASKS, masksRLE)
- postOutput.update(RoiLabel.BBOXES, bboxPerImg)
- postOutput.update(RoiLabel.CLASSES, classPerImg)
- postOutput.update(RoiLabel.SCORES, scorePerImg)
+
+ val boxNumber = boxesInImage(i)
+ if (boxNumber > 0) {
+ val maskPerImg = masks.narrow(1, start, boxNumber)
+ val bboxPerImg = bboxes[Tensor[Float]](i + 1)
+ val classPerImg = labels.narrow(1, start, boxNumber)
+ val scorePerImg = scores.narrow(1, start, boxNumber)
+
+ require(maskPerImg.size(1) == bboxPerImg.size(1), s"mask number ${maskPerImg.size(1)} " +
+ s"should be the same with box number ${bboxPerImg.size(1)}")
+
+ // resize bbox to original size
+ if (height != originalHeight || width != originalWidth) {
+ BboxUtil.scaleBBox(bboxPerImg,
+ originalHeight.toFloat / height, originalWidth.toFloat / width)
+ }
+ // decode mask to original size
+ val masksRLE = new Array[RLEMasks](boxNumber)
+ for (j <- 0 to boxNumber - 1) {
+ binaryMask.fill(0.0f)
+ Utils.decodeMaskInImage(maskPerImg.select(1, j + 1), bboxPerImg.select(1, j + 1),
+ binaryMask = binaryMask)
+ masksRLE(j) = MaskUtils.binaryToRLE(binaryMask)
+ }
+ start += boxNumber
+
+ postOutput.update(RoiLabel.MASKS, masksRLE)
+ postOutput.update(RoiLabel.BBOXES, bboxPerImg)
+ postOutput.update(RoiLabel.CLASSES, classPerImg)
+ postOutput.update(RoiLabel.SCORES, scorePerImg)
+ }
output(i + 1) = postOutput
}
diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/README.md b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/README.md
new file mode 100644
index 00000000000..13e2fb189a7
--- /dev/null
+++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/README.md
@@ -0,0 +1,71 @@
+# MaskRCNN
+This example demonstrates how to use BigDL to evaluate the [MaskRCNN](https://arxiv.org/abs/1703.06870) architecture on COCO data
+
+## Prepare the data
+* You can download [COCO dataset]() firstly.
+Extract the dataset and get images and annotations like (use **coco_2017_val** as example):
+```
+coco
+|_ coco_val2017
+| |_ .jpg
+| |_ ...
+| |_ .jpg
+|_ annotations
+ |_ instances_train2017.json
+ |_ ...
+```
+
+* Generate the hadoop sequence files for COCO dataset
+The following command will transform the images and annotations into hadoop sequence files.
+```bash
+java -cp com.intel.analytics.bigdl.models.utils.COCOSeqFileGenerator bigdl-VERSION-jar-with-dependencies.jar -f ./coco/coco_val2017 -m ./coco/annotations/instances_val2017.json -p 4 -o ./coco/output
+```
+In the above commands:
+-f: the COCO image files location
+-m: the annotation json file location
+-o: generated seq files location
+-p: number of parallel
+
+## Data Processing
+Input data are transformed by several pipeline classes, such as ScaleResize, ChannelNormalize, ImageFeatureToBatch, etc.
+
+## Model
+You can download **preTrain-MaskRCNN model** for BigDL by running
+```bash
+wget https://bigdlmodels.s3-us-west-2.amazonaws.com/segmentation/bigdl_mask-rcnn_COCO_0.10.0.model
+```
+This MaskRCNN model refers to [facebookresearch/maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark), and the model backbone is **R-50-FPN**.
+
+## Test the Model
+* Spark standalone, example command
+```bash
+spark-submit \
+--master spark://xxx.xxx.xxx.xxx:xxxx \
+--executor-cores cores_per_executor \
+--total-executor-cores total_cores_for_the_job \
+--driver-class-path dist/lib/bigdl-VERSION-jar-with-dependencies.jar \
+--class com.intel.analytics.bigdl.models.maskrcnn.Test \
+dist/lib/bigdl-VERSION-jar-with-dependencies.jar \
+--batchSize batch_size \
+-f hdfs://.../coco/val \
+--model modelPath
+```
+* Spark yarn client mode, example command
+```bash
+spark-submit \
+--master yarn \
+--deploy-mode client \
+--executor-cores cores_per_executor \
+--num-executors executors_number \
+--driver-class-path dist/lib/bigdl-VERSION-jar-with-dependencies.jar \
+--class com.intel.analytics.bigdl.models.inception.Test \
+dist/lib/bigdl-VERSION-jar-with-dependencies.jar \
+--batchSize batch_size \
+-f hdfs://.../coco/val \
+--model modelPath
+```
+In the above command
+* -f: where you put your COCO data, it should be a hdfs folder
+* --model: the model snapshot file
+* --batchSize: The mini-batch size. It is expected that the mini-batch size is a multiple of node_number * core_number.
+* --partitionNum: the partition number, default is node_number * core_number.
diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/Test.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/Test.scala
new file mode 100644
index 00000000000..b4ab3d3c8d4
--- /dev/null
+++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/Test.scala
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2016 The BigDL Authors.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.intel.analytics.bigdl.models.maskrcnn
+
+import com.intel.analytics.bigdl.models.resnet.Utils.{TestParams, _}
+import com.intel.analytics.bigdl.transform.vision.image._
+import com.intel.analytics.bigdl.transform.vision.image.augmentation._
+import com.intel.analytics.bigdl.utils.{Engine, T}
+import scopt.OptionParser
+import com.intel.analytics.bigdl.dataset.{DataSet, MiniBatch, segmentation}
+import com.intel.analytics.bigdl.nn.Module
+import com.intel.analytics.bigdl.optim.MeanAveragePrecision
+import org.apache.spark.{SparkContext, rdd}
+object Test {
+ case class TestParams(
+ folder: String = "./",
+ model: String = "",
+ batchSize: Int = 2,
+ partitionNum: Int = -1
+ )
+
+ val testParser = new OptionParser[TestParams]("BigDL Mask-RCNN on COCO Test Example") {
+ opt[String]('f', "folder")
+ .text("the location of COCO dataset")
+ .action((x, c) => c.copy(folder = x))
+
+ opt[String]('m', "model")
+ .text("the location of model snapshot")
+ .action((x, c) => c.copy(model = x))
+
+ opt[Int]('b', "batchSize")
+ .text("total batch size")
+ .action((x, c) => c.copy(batchSize = x))
+
+ opt[Int]('p', "partitionNum")
+ .text("partition number")
+ .action((x, c) => c.copy(partitionNum = x))
+ }
+
+ def main(args: Array[String]): Unit = {
+ testParser.parse(args, TestParams()).foreach { param => {
+ val conf = Engine.createSparkConf().setAppName("Test MaskRCNN on COCO")
+ .set("spark.akka.frameSize", 64.toString)
+ .set("spark.task.maxFailures", "1")
+ val sc = new SparkContext(conf)
+ Engine.init
+
+ val partitionNum = if (param.partitionNum > 0) param.partitionNum
+ else Engine.nodeNumber() * Engine.coreNumber()
+
+ val rddData = DataSet.SeqFileFolder.filesToRoiImageFrame(param.folder, sc, Some(partitionNum))
+ .toDistributed().data(train = false)
+
+ val transformer = MTImageFeatureToBatchWithResize(
+ sizeDivisible = 32,
+ batchSize = param.batchSize / Engine.nodeNumber(),
+ transformer =
+ PixelBytesToMat() ->
+ ScaleResize(minSize = 800, maxSize = 1333) ->
+ ChannelNormalize(122.7717f, 115.9465f, 102.9801f) ->
+ MatToTensor[Float](),
+ toRGB = false
+ )
+ val evaluationSet = transformer(rddData)
+
+ val model = Module.loadModule[Float](param.model)
+
+ val result = model.evaluate(evaluationSet,
+ Array(MeanAveragePrecision.cocoBBox(81), MeanAveragePrecision.cocoSegmentation(81)))
+ result.foreach(r => println(s"${r._2} is ${r._1}"))
+
+ sc.stop()
+ }}
+ }
+}
diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nn/BoxHead.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nn/BoxHead.scala
index 4d8a591ebff..b502bc7b753 100644
--- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nn/BoxHead.scala
+++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nn/BoxHead.scala
@@ -342,6 +342,11 @@ private[nn] class BoxPostProcessor(
totalDetections += maxDetection
outBBoxs[Tensor[Float]](i + 1).resize(maxDetection, 4)
totalROILables(i + 1) = roilabels
+ boxesInImage(i) = maxDetection
+ }
+ // clear others tensors in output
+ for (i <- (boxesInImage.length + 1) to outBBoxs.length()) {
+ outBBoxs.remove[Tensor[Float]](i)
}
// resize labels and scores
@@ -353,13 +358,15 @@ private[nn] class BoxPostProcessor(
var labelsOffset = outLabels.storageOffset() - 1
var scoresOffset = outScores.storageOffset() - 1
for (i <- 0 to boxesInImage.length - 1) {
- val roilabels = totalROILables[Array[RoiLabel]](i + 1)
- val bbox = outBBoxs[Tensor[Float]](i + 1).storage().array()
- val bboxOffset = outBBoxs[Tensor[Float]](i + 1).storageOffset() - 1
-
- resultToTensor(roilabels, labels, labelsOffset, bbox, bboxOffset, scores, scoresOffset)
- labelsOffset += outBBoxs[Tensor[Float]](i + 1).size(1)
- scoresOffset += outBBoxs[Tensor[Float]](i + 1).size(1)
+ if (boxesInImage(i) > 0) {
+ val roilabels = totalROILables[Array[RoiLabel]](i + 1)
+ val bbox = outBBoxs[Tensor[Float]](i + 1).storage().array()
+ val bboxOffset = outBBoxs[Tensor[Float]](i + 1).storageOffset() - 1
+
+ resultToTensor(roilabels, labels, labelsOffset, bbox, bboxOffset, scores, scoresOffset)
+ labelsOffset += outBBoxs[Tensor[Float]](i + 1).size(1)
+ scoresOffset += outBBoxs[Tensor[Float]](i + 1).size(1)
+ }
}
output
diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nn/Pooler.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nn/Pooler.scala
index df2761888c9..19deb3b5039 100644
--- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nn/Pooler.scala
+++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nn/Pooler.scala
@@ -109,10 +109,12 @@ class Pooler[T: ClassTag] (
} else { // for batch support
input[Table](2)
}
+
+ val batchSize = featureMaps.get[Tensor[Float]](1).get.size(1)
var totalNum = 0
val num_channels = featureMaps.get[Tensor[T]](1).get.size(2)
val out = T()
- for (i <- 0 to roiBatch.length() - 1) {
+ for (i <- 0 to batchSize - 1) {
val rois = roiBatch[Tensor[T]](i + 1)
val roi_levels = levelMapping(lvl_min, lvl_max, rois)
@@ -147,10 +149,10 @@ class Pooler[T: ClassTag] (
// merge to one tensor
output.resize(totalNum, num_channels, resolution, resolution)
var start = 1
- for (i <- 0 to roiBatch.length() - 1) {
+ for (i <- 0 to batchSize - 1) {
val tmp = out[Tensor[T]](i + 1)
val length = tmp.size(1)
- output.narrow(1, start, length).copy(tmp)
+ if (length > 0) output.narrow(1, start, length).copy(tmp)
start += length
}
diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nn/RegionProposal.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nn/RegionProposal.scala
index f4fba43f51d..a79f7d058df 100644
--- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nn/RegionProposal.scala
+++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nn/RegionProposal.scala
@@ -151,6 +151,10 @@ class RegionProposal(
// sort
selectOverAllLevels(selectorRes, postNmsTopN, bboxNumber, output[Tensor[Float]](b))
}
+ // clear others tensors in output
+ for (i <- (batchSize + 1) to output.length()) {
+ output.remove[Tensor[Float]](i)
+ }
output
}
diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/Evaluator.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/Evaluator.scala
index 6089286f5ee..de091ecf689 100644
--- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/Evaluator.scala
+++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/Evaluator.scala
@@ -88,14 +88,14 @@ class Evaluator[T: ClassTag] private[optim](model: Module[T])(implicit ev: Tenso
vMethods: Array[ValidationMethod[T]]
): Array[(ValidationResult, ValidationMethod[T])] = {
- val dummyInput = dataset.takeSample(withReplacement = false, num = 1).head.getInput()
val rdd = ConversionUtils.coalesce(dataset)
val modelBroad = ModelBroadcast[T]().broadcast(rdd.sparkContext,
- ConversionUtils.convert(model.evaluate()), dummyInput)
+ ConversionUtils.convert(model.evaluate()))
val otherBroad = rdd.sparkContext.broadcast(vMethods)
+
rdd.mapPartitions(miniBatch => {
- val localModel = modelBroad.value(false, true, dummyInput)
+ val localModel = modelBroad.value()
val localMethod = otherBroad.value
miniBatch.map(batch => {
val output = localModel.forward(batch.getInput())
diff --git a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/transform/vision/image/MTImageFeatureToBatchSpec.scala b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/transform/vision/image/MTImageFeatureToBatchSpec.scala
index adc9bd2e834..191d0df86cc 100644
--- a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/transform/vision/image/MTImageFeatureToBatchSpec.scala
+++ b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/transform/vision/image/MTImageFeatureToBatchSpec.scala
@@ -170,7 +170,8 @@ class MTImageFeatureToBatchSpec extends FlatSpec with Matchers with BeforeAndAft
miniBatch.foreach(batch => {
(batch.size() <= 3) should be (true)
- val input = batch.getInput().asInstanceOf[Tensor[Float]]
+ val inputAll = batch.getInput().asInstanceOf[Table]
+ val input = inputAll[Tensor[Float]](1)
val target = batch.getTarget().asInstanceOf[Table]
input.size() should be (Array(batch.size(), 3, 10, 20))
target.length() should be (batch.size())
@@ -179,7 +180,7 @@ class MTImageFeatureToBatchSpec extends FlatSpec with Matchers with BeforeAndAft
in should be(expectedOutput)
val t = target(i).asInstanceOf[Table]
t[Tensor[Float]](RoiLabel.ISCROWD) should be (Tensor(Array(0f, 1f), Array(2)))
- t[(Int, Int, Int)](RoiLabel.ORIGSIZE) should be((8, 16, 3))
+ // t[(Int, Int, Int)](RoiLabel.ORIGSIZE) should be((8, 16, 3))
t[Tensor[Float]](RoiLabel.BBOXES).size() should be (Array(2, 4))
t[Tensor[Float]](RoiLabel.CLASSES).size() should be (Array(2))
}