diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/DataSet.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/DataSet.scala index 7c5217546d5..0b491a63c51 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/DataSet.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/DataSet.scala @@ -356,13 +356,14 @@ object DataSet { /** * Wrap a RDD as a DataSet. * @param data + * @param partitionNum repartition data rdd to partition number, default node number. * @tparam T * @return */ - def rdd[T: ClassTag](data: RDD[T]): DistributedDataSet[T] = { - val nodeNumber = Engine.nodeNumber() + def rdd[T: ClassTag](data: RDD[T], partitionNum: Int = Engine.nodeNumber() + ): DistributedDataSet[T] = { new CachedDistriDataSet[T]( - data.coalesce(nodeNumber, true) + data.coalesce(partitionNum, true) .mapPartitions(iter => { Iterator.single(iter.toArray) }).setName("cached dataset") @@ -646,7 +647,7 @@ object DataSet { imf } .coalesce(num) - DataSet.rdd(rawData) + DataSet.rdd(rawData, num) } private[bigdl] def filesToImageFeatureDataset(url: String, sc: SparkContext, diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/MTImageFeatureToBatch.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/MTImageFeatureToBatch.scala index 20f1fb409a4..a88741aa6cb 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/MTImageFeatureToBatch.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/MTImageFeatureToBatch.scala @@ -16,11 +16,14 @@ package com.intel.analytics.bigdl.transform.vision.image import java.util.concurrent.atomic.AtomicInteger + import com.intel.analytics.bigdl.dataset.{MiniBatch, Sample, Transformer, Utils} +import com.intel.analytics.bigdl.nn.abstractnn.Activity import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric import com.intel.analytics.bigdl.tensor.{Storage, Tensor} import com.intel.analytics.bigdl.transform.vision.image.label.roi.RoiLabel import com.intel.analytics.bigdl.utils.{Engine, T, Table} + import scala.collection.mutable.IndexedSeq import scala.reflect.ClassTag @@ -199,23 +202,26 @@ class ClassificationMTImageFeatureToBatch private[bigdl](width: Int, height: Int * elements. The inner tensor holds the data for segmentation * RoiLabel.ISCROWD Whether each detection is crowd. (1 x N) Tensor[Float]. * -1: unknown, 0: not crowd, 1: is crowd - * RoiLabel.ORIGSIZE The original size of the image, tuple of (height, width, channels) + * RoiLabel.ImageInfo with shape (batchSize, 4), contains all images info + * (height, width, original height, original width) */ class RoiMiniBatch(val input: Tensor[Float], val target: IndexedSeq[RoiLabel], - val isCrowd: IndexedSeq[Tensor[Float]], val originalSizes: IndexedSeq[(Int, Int, Int)]) + val isCrowd: IndexedSeq[Tensor[Float]], val imageInfo: Tensor[Float] = null) extends MiniBatch[Float] { - override def size(): Int = { - input.size(1) - } + override def size(): Int = input.size(1) - override def getInput(): Tensor[Float] = input + override def getInput(): Activity = { + if (imageInfo == null) input else T(input, imageInfo) + } override def getTarget(): Table = { - val tables = (target, isCrowd, originalSizes).zipped.map { case (roiLabel, crowd, size) => + var i = 0 + val tables = (target, isCrowd).zipped.map { case (roiLabel, crowd) => + i += 1 roiLabel.toTable .update(RoiLabel.ISCROWD, crowd) - .update(RoiLabel.ORIGSIZE, size) + .update(RoiLabel.IMGINFO, imageInfo.select(1, i)) } T.seq(tables) } @@ -224,7 +230,7 @@ class RoiMiniBatch(val input: Tensor[Float], val target: IndexedSeq[RoiLabel], val subInput = input.narrow(1, offset, length) val subTarget = target.view(offset - 1, length) // offset starts from 1 val subIsCrowd = isCrowd.view(offset - 1, length) // offset starts from 1 - val subSize = originalSizes.view(offset - 1, length) // offset starts from 1 + val subSize = imageInfo.narrow(1, offset, length) RoiMiniBatch(subInput, subTarget, subIsCrowd, subSize) } @@ -236,8 +242,8 @@ class RoiMiniBatch(val input: Tensor[Float], val target: IndexedSeq[RoiLabel], object RoiMiniBatch { def apply(data: Tensor[Float], target: IndexedSeq[RoiLabel], - isCrowd: IndexedSeq[Tensor[Float]], originalSizes: IndexedSeq[(Int, Int, Int)]): - RoiMiniBatch = new RoiMiniBatch(data, target, isCrowd, originalSizes) + isCrowd: IndexedSeq[Tensor[Float]], imageInfo: Tensor[Float] = null): + RoiMiniBatch = new RoiMiniBatch(data, target, isCrowd, imageInfo) } @@ -259,7 +265,6 @@ class RoiMTImageFeatureToBatch private[bigdl](width: Int, height: Int, private val featureData: Array[Float] = new Array[Float](batchSize * frameLength * 3) private val labelData: Array[RoiLabel] = new Array[RoiLabel](batchSize) private val isCrowdData: Array[Tensor[Float]] = new Array[Tensor[Float]](batchSize) - private val origSizeData: Array[(Int, Int, Int)] = new Array[(Int, Int, Int)](batchSize) private var featureTensor: Tensor[Float] = null override protected def processImageFeature(img: ImageFeature, position: Int): Unit = { @@ -270,7 +275,6 @@ class RoiMTImageFeatureToBatch private[bigdl](width: Int, height: Int, "in ImageFeature's ISCROWD should be equal to the number of detections in the RoiLabel") isCrowdData(position) = isCrowd labelData(position) = label - origSizeData(position) = img.getOriginalSize } override protected def createBatch(batchSize: Int): MiniBatch[Float] = { @@ -278,7 +282,7 @@ class RoiMTImageFeatureToBatch private[bigdl](width: Int, height: Int, featureTensor = Tensor(Storage[Float](featureData), storageOffset = 1, size = Array(batchSize, 3, height, width)) } - RoiMiniBatch(featureTensor, labelData.view, isCrowdData.view, origSizeData.view) + RoiMiniBatch(featureTensor, labelData.view, isCrowdData.view) } } @@ -298,7 +302,7 @@ class RoiImageFeatureToBatchWithResize private[bigdl](sizeDivisible: Int = -1, t private val labelData: Array[RoiLabel] = new Array[RoiLabel](batchSize) private val isCrowdData: Array[Tensor[Float]] = new Array[Tensor[Float]](batchSize) - private val origSizeData: Array[(Int, Int, Int)] = new Array[(Int, Int, Int)](batchSize) + private val imgInfoData: Tensor[Float] = Tensor[Float](batchSize, 4) private var featureTensor: Tensor[Float] = null private val imageBuffer = new Array[Tensor[Float]](batchSize) @@ -324,11 +328,16 @@ class RoiImageFeatureToBatchWithResize private[bigdl](sizeDivisible: Int = -1, t img.copyTo(imageBuffer(position).storage().array(), 0, toRGB = toRGB) val isCrowd = img(RoiLabel.ISCROWD).asInstanceOf[Tensor[Float]] val label = img.getLabel.asInstanceOf[RoiLabel] - require(label.bboxes.size(1) == isCrowd.size(1), "The number of detections" + - "in ImageFeature's ISCROWD should be equal to the number of detections in the RoiLabel") + if (isCrowd != null && label != null) { + require(label.bboxes.size(1) == isCrowd.size(1), "The number of detections" + + "in ImageFeature's ISCROWD should be equal to the number of detections in the RoiLabel") + } isCrowdData(position) = isCrowd labelData(position) = label - origSizeData(position) = img.getOriginalSize + imgInfoData.setValue(position + 1, 1, img.getHeight()) + imgInfoData.setValue(position + 1, 2, img.getWidth()) + imgInfoData.setValue(position + 1, 3, img.getOriginalHeight) + imgInfoData.setValue(position + 1, 4, img.getOriginalWidth) } override protected def createBatch(batchSize: Int): MiniBatch[Float] = { @@ -341,6 +350,6 @@ class RoiImageFeatureToBatchWithResize private[bigdl](sizeDivisible: Int = -1, t .narrow(3, 1, imageBuffer(i).size(3)).copy(imageBuffer(i)) } RoiMiniBatch(featureTensor, labelData.view(0, batchSize), - isCrowdData.view(0, batchSize), origSizeData.view(0, batchSize)) + isCrowdData.view(0, batchSize), imgInfoData.narrow(1, 1, batchSize)) } } diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/label/roi/RoiLabel.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/label/roi/RoiLabel.scala index 19d3eab5631..9bfd21a9c51 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/label/roi/RoiLabel.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/label/roi/RoiLabel.scala @@ -75,20 +75,16 @@ object RoiLabel { val MASKS = "masks" // ISCROWD and ORIGSIZE are stored in ImageFeature val ISCROWD = "is_crowd" - val ORIGSIZE = "size" + val IMGINFO = "imgInfo" val SCORES = "scores" - def getScores(tab: Table): Tensor[Float] = tab[Tensor[Float]](SCORES) + def getClasses(tab: Table): Tensor[Float] = tab[Tensor[Float]](CLASSES) def getBBoxes(tab: Table): Tensor[Float] = tab[Tensor[Float]](BBOXES) def getMasks(tab: Table): Array[RLEMasks] = tab[Array[RLEMasks]](MASKS) def getIsCrowd(tab: Table): Tensor[Float] = tab[Tensor[Float]](ISCROWD) - - /** - * @return (height, width, channel) - */ - def getOrigSize(tab: Table): (Int, Int, Int) = - tab[(Int, Int, Int)](ORIGSIZE) + def getScores(tab: Table): Tensor[Float] = tab[Tensor[Float]](SCORES) + def getImgInfo(tab: Table): Tensor[Float] = tab[Tensor[Float]](IMGINFO) def fromTensor(tensor: Tensor[Float]): RoiLabel = { diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/MaskRCNN.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/MaskRCNN.scala index f7587cae4b1..4d4f1d59fd2 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/MaskRCNN.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/MaskRCNN.scala @@ -18,7 +18,6 @@ package com.intel.analytics.bigdl.models.maskrcnn import com.intel.analytics.bigdl.Module import com.intel.analytics.bigdl.dataset.segmentation.{MaskUtils, RLEMasks} -import com.intel.analytics.bigdl.models.resnet.{Convolution, Sbn} import com.intel.analytics.bigdl.nn._ import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, Activity} import com.intel.analytics.bigdl.serialization.Bigdl.{AttrValue, BigDLModule} @@ -78,14 +77,28 @@ class MaskRCNN(val inChannels: Int, modules.append(boxHead.asInstanceOf[Module[Float]]) modules.append(maskHead.asInstanceOf[Module[Float]]) - private def buildResNet50(): Module[Float] = { + private def buildResNet50(): Module[Float] = { + + def convolution (nInputPlane: Int, nOutputPlane: Int, kernelW: Int, kernelH: Int, + strideW: Int = 1, strideH: Int = 1, padW: Int = 0, padH: Int = 0, + nGroup: Int = 1, propagateBack: Boolean = true): SpatialConvolution[Float] = { + val conv = SpatialConvolution[Float](nInputPlane, nOutputPlane, kernelW, kernelH, + strideW, strideH, padW, padH, nGroup, propagateBack, withBias = false) + conv.setInitMethod(MsraFiller(false), Zeros) + conv + } + + def sbn(nOutput: Int, eps: Double = 1e-3, momentum: Double = 0.1, affine: Boolean = true) + : SpatialBatchNormalization[Float] = { + SpatialBatchNormalization[Float](nOutput, eps, momentum, affine).setInitMethod(Ones, Zeros) + } def shortcut(nInputPlane: Int, nOutputPlane: Int, stride: Int, useConv: Boolean = false): Module[Float] = { if (useConv) { Sequential() - .add(Convolution(nInputPlane, nOutputPlane, 1, 1, stride, stride)) - .add(Sbn(nOutputPlane)) + .add(convolution(nInputPlane, nOutputPlane, 1, 1, stride, stride)) + .add(sbn(nOutputPlane)) } else { Identity() } @@ -94,14 +107,14 @@ class MaskRCNN(val inChannels: Int, def bottleneck(nInputPlane: Int, internalPlane: Int, nOutputPlane: Int, stride: Int, useConv: Boolean = false): Module[Float] = { val s = Sequential() - .add(Convolution(nInputPlane, internalPlane, 1, 1, stride, stride, 0, 0)) - .add(Sbn(internalPlane)) + .add(convolution(nInputPlane, internalPlane, 1, 1, stride, stride, 0, 0)) + .add(sbn(internalPlane)) .add(ReLU(true)) - .add(Convolution(internalPlane, internalPlane, 3, 3, 1, 1, 1, 1)) - .add(Sbn(internalPlane)) + .add(convolution(internalPlane, internalPlane, 3, 3, 1, 1, 1, 1)) + .add(sbn(internalPlane)) .add(ReLU(true)) - .add(Convolution(internalPlane, nOutputPlane, 1, 1, 1, 1, 0, 0)) - .add(Sbn(nOutputPlane)) + .add(convolution(internalPlane, nOutputPlane, 1, 1, 1, 1, 0, 0)) + .add(sbn(nOutputPlane)) val m = Sequential() .add(ConcatTable() @@ -123,8 +136,8 @@ class MaskRCNN(val inChannels: Int, } val model = Sequential[Float]() - .add(Convolution(3, 64, 7, 7, 2, 2, 3, 3, optnet = false, propagateBack = false)) - .add(Sbn(64)) + .add(convolution(3, 64, 7, 7, 2, 2, 3, 3, propagateBack = false)) + .add(sbn(64)) .add(ReLU(true)) .add(SpatialMaxPooling(3, 3, 2, 2, 1, 1)) @@ -164,12 +177,18 @@ class MaskRCNN(val inChannels: Int, val labelsBox = postProcessorBox[Tensor[Float]](1) val proposalsBox = postProcessorBox[Table](2) val scores = postProcessorBox[Tensor[Float]](3) - val masks = this.maskHead.forward(T(features, proposalsBox, labelsBox)).toTable - if (this.isTraining()) { - output = T(proposalsBox, labelsBox, masks, scores) - } else { - output = postProcessorForMaskRCNN(proposalsBox, labelsBox, masks[Tensor[Float]](2), - scores, imageInfo) + if (labelsBox.size(1) > 0) { + val masks = this.maskHead.forward(T(features, proposalsBox, labelsBox)).toTable + if (this.isTraining()) { + output = T(proposalsBox, labelsBox, masks, scores) + } else { + output = postProcessorForMaskRCNN(proposalsBox, labelsBox, masks[Tensor[Float]](2), + scores, imageInfo) + } + } else { // detect nothing + for (i <- 1 to inputFeatures.size(1)) { + output.toTable(i) = T() + } } output @@ -196,36 +215,39 @@ class MaskRCNN(val inChannels: Int, binaryMask.resize(originalHeight, originalWidth) - val boxNumber = boxesInImage(i) - val maskPerImg = masks.narrow(1, start, boxNumber) - val bboxPerImg = bboxes[Tensor[Float]](i + 1) - val classPerImg = labels.narrow(1, start, boxNumber) - val scorePerImg = scores.narrow(1, start, boxNumber) - - require(maskPerImg.size(1) == bboxPerImg.size(1), - s"mask number ${maskPerImg.size(1)} should be same with box number ${bboxPerImg.size(1)}") - - // bbox resize to original size - if (height != originalHeight || width != originalWidth) { - BboxUtil.scaleBBox(bboxPerImg, - originalHeight.toFloat / height, originalWidth.toFloat / width) - } - // mask decode to original size - val masksRLE = new Array[RLEMasks](boxNumber) - for (j <- 0 to boxNumber - 1) { - binaryMask.fill(0.0f) - Utils.decodeMaskInImage(maskPerImg.select(1, j + 1), bboxPerImg.select(1, j + 1), - binaryMask = binaryMask) - masksRLE(j) = MaskUtils.binaryToRLE(binaryMask) - } - start += boxNumber - // prepare for evaluation val postOutput = T() - postOutput.update(RoiLabel.MASKS, masksRLE) - postOutput.update(RoiLabel.BBOXES, bboxPerImg) - postOutput.update(RoiLabel.CLASSES, classPerImg) - postOutput.update(RoiLabel.SCORES, scorePerImg) + + val boxNumber = boxesInImage(i) + if (boxNumber > 0) { + val maskPerImg = masks.narrow(1, start, boxNumber) + val bboxPerImg = bboxes[Tensor[Float]](i + 1) + val classPerImg = labels.narrow(1, start, boxNumber) + val scorePerImg = scores.narrow(1, start, boxNumber) + + require(maskPerImg.size(1) == bboxPerImg.size(1), s"mask number ${maskPerImg.size(1)} " + + s"should be the same with box number ${bboxPerImg.size(1)}") + + // resize bbox to original size + if (height != originalHeight || width != originalWidth) { + BboxUtil.scaleBBox(bboxPerImg, + originalHeight.toFloat / height, originalWidth.toFloat / width) + } + // decode mask to original size + val masksRLE = new Array[RLEMasks](boxNumber) + for (j <- 0 to boxNumber - 1) { + binaryMask.fill(0.0f) + Utils.decodeMaskInImage(maskPerImg.select(1, j + 1), bboxPerImg.select(1, j + 1), + binaryMask = binaryMask) + masksRLE(j) = MaskUtils.binaryToRLE(binaryMask) + } + start += boxNumber + + postOutput.update(RoiLabel.MASKS, masksRLE) + postOutput.update(RoiLabel.BBOXES, bboxPerImg) + postOutput.update(RoiLabel.CLASSES, classPerImg) + postOutput.update(RoiLabel.SCORES, scorePerImg) + } output(i + 1) = postOutput } diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/README.md b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/README.md new file mode 100644 index 00000000000..13e2fb189a7 --- /dev/null +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/README.md @@ -0,0 +1,71 @@ +# MaskRCNN +This example demonstrates how to use BigDL to evaluate the [MaskRCNN](https://arxiv.org/abs/1703.06870) architecture on COCO data + +## Prepare the data +* You can download [COCO dataset]() firstly. +Extract the dataset and get images and annotations like (use **coco_2017_val** as example): +``` +coco +|_ coco_val2017 +| |_ .jpg +| |_ ... +| |_ .jpg +|_ annotations + |_ instances_train2017.json + |_ ... +``` + +* Generate the hadoop sequence files for COCO dataset +The following command will transform the images and annotations into hadoop sequence files. +```bash +java -cp com.intel.analytics.bigdl.models.utils.COCOSeqFileGenerator bigdl-VERSION-jar-with-dependencies.jar -f ./coco/coco_val2017 -m ./coco/annotations/instances_val2017.json -p 4 -o ./coco/output +``` +In the above commands: +-f: the COCO image files location +-m: the annotation json file location +-o: generated seq files location +-p: number of parallel + +## Data Processing +Input data are transformed by several pipeline classes, such as ScaleResize, ChannelNormalize, ImageFeatureToBatch, etc. + +## Model +You can download **preTrain-MaskRCNN model** for BigDL by running +```bash +wget https://bigdlmodels.s3-us-west-2.amazonaws.com/segmentation/bigdl_mask-rcnn_COCO_0.10.0.model +``` +This MaskRCNN model refers to [facebookresearch/maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark), and the model backbone is **R-50-FPN**. + +## Test the Model +* Spark standalone, example command +```bash +spark-submit \ +--master spark://xxx.xxx.xxx.xxx:xxxx \ +--executor-cores cores_per_executor \ +--total-executor-cores total_cores_for_the_job \ +--driver-class-path dist/lib/bigdl-VERSION-jar-with-dependencies.jar \ +--class com.intel.analytics.bigdl.models.maskrcnn.Test \ +dist/lib/bigdl-VERSION-jar-with-dependencies.jar \ +--batchSize batch_size \ +-f hdfs://.../coco/val \ +--model modelPath +``` +* Spark yarn client mode, example command +```bash +spark-submit \ +--master yarn \ +--deploy-mode client \ +--executor-cores cores_per_executor \ +--num-executors executors_number \ +--driver-class-path dist/lib/bigdl-VERSION-jar-with-dependencies.jar \ +--class com.intel.analytics.bigdl.models.inception.Test \ +dist/lib/bigdl-VERSION-jar-with-dependencies.jar \ +--batchSize batch_size \ +-f hdfs://.../coco/val \ +--model modelPath +``` +In the above command +* -f: where you put your COCO data, it should be a hdfs folder +* --model: the model snapshot file +* --batchSize: The mini-batch size. It is expected that the mini-batch size is a multiple of node_number * core_number. +* --partitionNum: the partition number, default is node_number * core_number. diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/Test.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/Test.scala new file mode 100644 index 00000000000..b4ab3d3c8d4 --- /dev/null +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/Test.scala @@ -0,0 +1,89 @@ +/* + * Copyright 2016 The BigDL Authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.intel.analytics.bigdl.models.maskrcnn + +import com.intel.analytics.bigdl.models.resnet.Utils.{TestParams, _} +import com.intel.analytics.bigdl.transform.vision.image._ +import com.intel.analytics.bigdl.transform.vision.image.augmentation._ +import com.intel.analytics.bigdl.utils.{Engine, T} +import scopt.OptionParser +import com.intel.analytics.bigdl.dataset.{DataSet, MiniBatch, segmentation} +import com.intel.analytics.bigdl.nn.Module +import com.intel.analytics.bigdl.optim.MeanAveragePrecision +import org.apache.spark.{SparkContext, rdd} +object Test { + case class TestParams( + folder: String = "./", + model: String = "", + batchSize: Int = 2, + partitionNum: Int = -1 + ) + + val testParser = new OptionParser[TestParams]("BigDL Mask-RCNN on COCO Test Example") { + opt[String]('f', "folder") + .text("the location of COCO dataset") + .action((x, c) => c.copy(folder = x)) + + opt[String]('m', "model") + .text("the location of model snapshot") + .action((x, c) => c.copy(model = x)) + + opt[Int]('b', "batchSize") + .text("total batch size") + .action((x, c) => c.copy(batchSize = x)) + + opt[Int]('p', "partitionNum") + .text("partition number") + .action((x, c) => c.copy(partitionNum = x)) + } + + def main(args: Array[String]): Unit = { + testParser.parse(args, TestParams()).foreach { param => { + val conf = Engine.createSparkConf().setAppName("Test MaskRCNN on COCO") + .set("spark.akka.frameSize", 64.toString) + .set("spark.task.maxFailures", "1") + val sc = new SparkContext(conf) + Engine.init + + val partitionNum = if (param.partitionNum > 0) param.partitionNum + else Engine.nodeNumber() * Engine.coreNumber() + + val rddData = DataSet.SeqFileFolder.filesToRoiImageFrame(param.folder, sc, Some(partitionNum)) + .toDistributed().data(train = false) + + val transformer = MTImageFeatureToBatchWithResize( + sizeDivisible = 32, + batchSize = param.batchSize / Engine.nodeNumber(), + transformer = + PixelBytesToMat() -> + ScaleResize(minSize = 800, maxSize = 1333) -> + ChannelNormalize(122.7717f, 115.9465f, 102.9801f) -> + MatToTensor[Float](), + toRGB = false + ) + val evaluationSet = transformer(rddData) + + val model = Module.loadModule[Float](param.model) + + val result = model.evaluate(evaluationSet, + Array(MeanAveragePrecision.cocoBBox(81), MeanAveragePrecision.cocoSegmentation(81))) + result.foreach(r => println(s"${r._2} is ${r._1}")) + + sc.stop() + }} + } +} diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nn/BoxHead.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nn/BoxHead.scala index 4d8a591ebff..b502bc7b753 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nn/BoxHead.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nn/BoxHead.scala @@ -342,6 +342,11 @@ private[nn] class BoxPostProcessor( totalDetections += maxDetection outBBoxs[Tensor[Float]](i + 1).resize(maxDetection, 4) totalROILables(i + 1) = roilabels + boxesInImage(i) = maxDetection + } + // clear others tensors in output + for (i <- (boxesInImage.length + 1) to outBBoxs.length()) { + outBBoxs.remove[Tensor[Float]](i) } // resize labels and scores @@ -353,13 +358,15 @@ private[nn] class BoxPostProcessor( var labelsOffset = outLabels.storageOffset() - 1 var scoresOffset = outScores.storageOffset() - 1 for (i <- 0 to boxesInImage.length - 1) { - val roilabels = totalROILables[Array[RoiLabel]](i + 1) - val bbox = outBBoxs[Tensor[Float]](i + 1).storage().array() - val bboxOffset = outBBoxs[Tensor[Float]](i + 1).storageOffset() - 1 - - resultToTensor(roilabels, labels, labelsOffset, bbox, bboxOffset, scores, scoresOffset) - labelsOffset += outBBoxs[Tensor[Float]](i + 1).size(1) - scoresOffset += outBBoxs[Tensor[Float]](i + 1).size(1) + if (boxesInImage(i) > 0) { + val roilabels = totalROILables[Array[RoiLabel]](i + 1) + val bbox = outBBoxs[Tensor[Float]](i + 1).storage().array() + val bboxOffset = outBBoxs[Tensor[Float]](i + 1).storageOffset() - 1 + + resultToTensor(roilabels, labels, labelsOffset, bbox, bboxOffset, scores, scoresOffset) + labelsOffset += outBBoxs[Tensor[Float]](i + 1).size(1) + scoresOffset += outBBoxs[Tensor[Float]](i + 1).size(1) + } } output diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nn/Pooler.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nn/Pooler.scala index df2761888c9..19deb3b5039 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nn/Pooler.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nn/Pooler.scala @@ -109,10 +109,12 @@ class Pooler[T: ClassTag] ( } else { // for batch support input[Table](2) } + + val batchSize = featureMaps.get[Tensor[Float]](1).get.size(1) var totalNum = 0 val num_channels = featureMaps.get[Tensor[T]](1).get.size(2) val out = T() - for (i <- 0 to roiBatch.length() - 1) { + for (i <- 0 to batchSize - 1) { val rois = roiBatch[Tensor[T]](i + 1) val roi_levels = levelMapping(lvl_min, lvl_max, rois) @@ -147,10 +149,10 @@ class Pooler[T: ClassTag] ( // merge to one tensor output.resize(totalNum, num_channels, resolution, resolution) var start = 1 - for (i <- 0 to roiBatch.length() - 1) { + for (i <- 0 to batchSize - 1) { val tmp = out[Tensor[T]](i + 1) val length = tmp.size(1) - output.narrow(1, start, length).copy(tmp) + if (length > 0) output.narrow(1, start, length).copy(tmp) start += length } diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nn/RegionProposal.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nn/RegionProposal.scala index f4fba43f51d..a79f7d058df 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nn/RegionProposal.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/nn/RegionProposal.scala @@ -151,6 +151,10 @@ class RegionProposal( // sort selectOverAllLevels(selectorRes, postNmsTopN, bboxNumber, output[Tensor[Float]](b)) } + // clear others tensors in output + for (i <- (batchSize + 1) to output.length()) { + output.remove[Tensor[Float]](i) + } output } diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/Evaluator.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/Evaluator.scala index 6089286f5ee..de091ecf689 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/Evaluator.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/Evaluator.scala @@ -88,14 +88,14 @@ class Evaluator[T: ClassTag] private[optim](model: Module[T])(implicit ev: Tenso vMethods: Array[ValidationMethod[T]] ): Array[(ValidationResult, ValidationMethod[T])] = { - val dummyInput = dataset.takeSample(withReplacement = false, num = 1).head.getInput() val rdd = ConversionUtils.coalesce(dataset) val modelBroad = ModelBroadcast[T]().broadcast(rdd.sparkContext, - ConversionUtils.convert(model.evaluate()), dummyInput) + ConversionUtils.convert(model.evaluate())) val otherBroad = rdd.sparkContext.broadcast(vMethods) + rdd.mapPartitions(miniBatch => { - val localModel = modelBroad.value(false, true, dummyInput) + val localModel = modelBroad.value() val localMethod = otherBroad.value miniBatch.map(batch => { val output = localModel.forward(batch.getInput()) diff --git a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/transform/vision/image/MTImageFeatureToBatchSpec.scala b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/transform/vision/image/MTImageFeatureToBatchSpec.scala index adc9bd2e834..191d0df86cc 100644 --- a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/transform/vision/image/MTImageFeatureToBatchSpec.scala +++ b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dllib/transform/vision/image/MTImageFeatureToBatchSpec.scala @@ -170,7 +170,8 @@ class MTImageFeatureToBatchSpec extends FlatSpec with Matchers with BeforeAndAft miniBatch.foreach(batch => { (batch.size() <= 3) should be (true) - val input = batch.getInput().asInstanceOf[Tensor[Float]] + val inputAll = batch.getInput().asInstanceOf[Table] + val input = inputAll[Tensor[Float]](1) val target = batch.getTarget().asInstanceOf[Table] input.size() should be (Array(batch.size(), 3, 10, 20)) target.length() should be (batch.size()) @@ -179,7 +180,7 @@ class MTImageFeatureToBatchSpec extends FlatSpec with Matchers with BeforeAndAft in should be(expectedOutput) val t = target(i).asInstanceOf[Table] t[Tensor[Float]](RoiLabel.ISCROWD) should be (Tensor(Array(0f, 1f), Array(2))) - t[(Int, Int, Int)](RoiLabel.ORIGSIZE) should be((8, 16, 3)) + // t[(Int, Int, Int)](RoiLabel.ORIGSIZE) should be((8, 16, 3)) t[Tensor[Float]](RoiLabel.BBOXES).size() should be (Array(2, 4)) t[Tensor[Float]](RoiLabel.CLASSES).size() should be (Array(2)) }