Skip to content

Commit

Permalink
add maskrcnn inference example (intel-analytics#2944)
Browse files Browse the repository at this point in the history
* add maskrcnn inference example

* meet pr comments

* add model download url
  • Loading branch information
zhangxiaoli73 committed Oct 28, 2019
1 parent bac5997 commit 811f063
Show file tree
Hide file tree
Showing 11 changed files with 294 additions and 92 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -356,13 +356,14 @@ object DataSet {
/**
* Wrap a RDD as a DataSet.
* @param data
* @param partitionNum repartition data rdd to partition number, default node number.
* @tparam T
* @return
*/
def rdd[T: ClassTag](data: RDD[T]): DistributedDataSet[T] = {
val nodeNumber = Engine.nodeNumber()
def rdd[T: ClassTag](data: RDD[T], partitionNum: Int = Engine.nodeNumber()
): DistributedDataSet[T] = {
new CachedDistriDataSet[T](
data.coalesce(nodeNumber, true)
data.coalesce(partitionNum, true)
.mapPartitions(iter => {
Iterator.single(iter.toArray)
}).setName("cached dataset")
Expand Down Expand Up @@ -646,7 +647,7 @@ object DataSet {
imf
}
.coalesce(num)
DataSet.rdd(rawData)
DataSet.rdd(rawData, num)
}

private[bigdl] def filesToImageFeatureDataset(url: String, sc: SparkContext,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ package com.intel.analytics.bigdl.models.maskrcnn

import com.intel.analytics.bigdl.Module
import com.intel.analytics.bigdl.dataset.segmentation.{MaskUtils, RLEMasks}
import com.intel.analytics.bigdl.models.resnet.{Convolution, Sbn}
import com.intel.analytics.bigdl.nn._
import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, Activity}
import com.intel.analytics.bigdl.serialization.Bigdl.{AttrValue, BigDLModule}
Expand Down Expand Up @@ -78,14 +77,28 @@ class MaskRCNN(val inChannels: Int,
modules.append(boxHead.asInstanceOf[Module[Float]])
modules.append(maskHead.asInstanceOf[Module[Float]])

private def buildResNet50(): Module[Float] = {
private def buildResNet50(): Module[Float] = {

def convolution (nInputPlane: Int, nOutputPlane: Int, kernelW: Int, kernelH: Int,
strideW: Int = 1, strideH: Int = 1, padW: Int = 0, padH: Int = 0,
nGroup: Int = 1, propagateBack: Boolean = true): SpatialConvolution[Float] = {
val conv = SpatialConvolution[Float](nInputPlane, nOutputPlane, kernelW, kernelH,
strideW, strideH, padW, padH, nGroup, propagateBack, withBias = false)
conv.setInitMethod(MsraFiller(false), Zeros)
conv
}

def sbn(nOutput: Int, eps: Double = 1e-3, momentum: Double = 0.1, affine: Boolean = true)
: SpatialBatchNormalization[Float] = {
SpatialBatchNormalization[Float](nOutput, eps, momentum, affine).setInitMethod(Ones, Zeros)
}

def shortcut(nInputPlane: Int, nOutputPlane: Int, stride: Int,
useConv: Boolean = false): Module[Float] = {
if (useConv) {
Sequential()
.add(Convolution(nInputPlane, nOutputPlane, 1, 1, stride, stride))
.add(Sbn(nOutputPlane))
.add(convolution(nInputPlane, nOutputPlane, 1, 1, stride, stride))
.add(sbn(nOutputPlane))
} else {
Identity()
}
Expand All @@ -94,14 +107,14 @@ class MaskRCNN(val inChannels: Int,
def bottleneck(nInputPlane: Int, internalPlane: Int, nOutputPlane: Int,
stride: Int, useConv: Boolean = false): Module[Float] = {
val s = Sequential()
.add(Convolution(nInputPlane, internalPlane, 1, 1, stride, stride, 0, 0))
.add(Sbn(internalPlane))
.add(convolution(nInputPlane, internalPlane, 1, 1, stride, stride, 0, 0))
.add(sbn(internalPlane))
.add(ReLU(true))
.add(Convolution(internalPlane, internalPlane, 3, 3, 1, 1, 1, 1))
.add(Sbn(internalPlane))
.add(convolution(internalPlane, internalPlane, 3, 3, 1, 1, 1, 1))
.add(sbn(internalPlane))
.add(ReLU(true))
.add(Convolution(internalPlane, nOutputPlane, 1, 1, 1, 1, 0, 0))
.add(Sbn(nOutputPlane))
.add(convolution(internalPlane, nOutputPlane, 1, 1, 1, 1, 0, 0))
.add(sbn(nOutputPlane))

val m = Sequential()
.add(ConcatTable()
Expand All @@ -123,8 +136,8 @@ class MaskRCNN(val inChannels: Int,
}

val model = Sequential[Float]()
.add(Convolution(3, 64, 7, 7, 2, 2, 3, 3, optnet = false, propagateBack = false))
.add(Sbn(64))
.add(convolution(3, 64, 7, 7, 2, 2, 3, 3, propagateBack = false))
.add(sbn(64))
.add(ReLU(true))
.add(SpatialMaxPooling(3, 3, 2, 2, 1, 1))

Expand Down Expand Up @@ -164,12 +177,18 @@ class MaskRCNN(val inChannels: Int,
val labelsBox = postProcessorBox[Tensor[Float]](1)
val proposalsBox = postProcessorBox[Table](2)
val scores = postProcessorBox[Tensor[Float]](3)
val masks = this.maskHead.forward(T(features, proposalsBox, labelsBox)).toTable
if (this.isTraining()) {
output = T(proposalsBox, labelsBox, masks, scores)
} else {
output = postProcessorForMaskRCNN(proposalsBox, labelsBox, masks[Tensor[Float]](2),
scores, imageInfo)
if (labelsBox.size(1) > 0) {
val masks = this.maskHead.forward(T(features, proposalsBox, labelsBox)).toTable
if (this.isTraining()) {
output = T(proposalsBox, labelsBox, masks, scores)
} else {
output = postProcessorForMaskRCNN(proposalsBox, labelsBox, masks[Tensor[Float]](2),
scores, imageInfo)
}
} else { // detect nothing
for (i <- 1 to inputFeatures.size(1)) {
output.toTable(i) = T()
}
}

output
Expand All @@ -196,36 +215,39 @@ class MaskRCNN(val inChannels: Int,

binaryMask.resize(originalHeight, originalWidth)

val boxNumber = boxesInImage(i)
val maskPerImg = masks.narrow(1, start, boxNumber)
val bboxPerImg = bboxes[Tensor[Float]](i + 1)
val classPerImg = labels.narrow(1, start, boxNumber)
val scorePerImg = scores.narrow(1, start, boxNumber)

require(maskPerImg.size(1) == bboxPerImg.size(1),
s"mask number ${maskPerImg.size(1)} should be same with box number ${bboxPerImg.size(1)}")

// bbox resize to original size
if (height != originalHeight || width != originalWidth) {
BboxUtil.scaleBBox(bboxPerImg,
originalHeight.toFloat / height, originalWidth.toFloat / width)
}
// mask decode to original size
val masksRLE = new Array[RLEMasks](boxNumber)
for (j <- 0 to boxNumber - 1) {
binaryMask.fill(0.0f)
Utils.decodeMaskInImage(maskPerImg.select(1, j + 1), bboxPerImg.select(1, j + 1),
binaryMask = binaryMask)
masksRLE(j) = MaskUtils.binaryToRLE(binaryMask)
}
start += boxNumber

// prepare for evaluation
val postOutput = T()
postOutput.update(RoiLabel.MASKS, masksRLE)
postOutput.update(RoiLabel.BBOXES, bboxPerImg)
postOutput.update(RoiLabel.CLASSES, classPerImg)
postOutput.update(RoiLabel.SCORES, scorePerImg)

val boxNumber = boxesInImage(i)
if (boxNumber > 0) {
val maskPerImg = masks.narrow(1, start, boxNumber)
val bboxPerImg = bboxes[Tensor[Float]](i + 1)
val classPerImg = labels.narrow(1, start, boxNumber)
val scorePerImg = scores.narrow(1, start, boxNumber)

require(maskPerImg.size(1) == bboxPerImg.size(1), s"mask number ${maskPerImg.size(1)} " +
s"should be the same with box number ${bboxPerImg.size(1)}")

// resize bbox to original size
if (height != originalHeight || width != originalWidth) {
BboxUtil.scaleBBox(bboxPerImg,
originalHeight.toFloat / height, originalWidth.toFloat / width)
}
// decode mask to original size
val masksRLE = new Array[RLEMasks](boxNumber)
for (j <- 0 to boxNumber - 1) {
binaryMask.fill(0.0f)
Utils.decodeMaskInImage(maskPerImg.select(1, j + 1), bboxPerImg.select(1, j + 1),
binaryMask = binaryMask)
masksRLE(j) = MaskUtils.binaryToRLE(binaryMask)
}
start += boxNumber

postOutput.update(RoiLabel.MASKS, masksRLE)
postOutput.update(RoiLabel.BBOXES, bboxPerImg)
postOutput.update(RoiLabel.CLASSES, classPerImg)
postOutput.update(RoiLabel.SCORES, scorePerImg)
}

output(i + 1) = postOutput
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# MaskRCNN
This example demonstrates how to use BigDL to evaluate the [MaskRCNN](https://arxiv.org/abs/1703.06870) architecture on COCO data

## Prepare the data
* You can download [COCO dataset](<http://cocodataset.org/>) firstly.
Extract the dataset and get images and annotations like (use **coco_2017_val** as example):
```
coco
|_ coco_val2017
| |_ <im-1-name>.jpg
| |_ ...
| |_ <im-N-name>.jpg
|_ annotations
|_ instances_train2017.json
|_ ...
```

* Generate the hadoop sequence files for COCO dataset
The following command will transform the images and annotations into hadoop sequence files.
```bash
java -cp com.intel.analytics.bigdl.models.utils.COCOSeqFileGenerator bigdl-VERSION-jar-with-dependencies.jar -f ./coco/coco_val2017 -m ./coco/annotations/instances_val2017.json -p 4 -o ./coco/output
```
In the above commands:
-f: the COCO image files location
-m: the annotation json file location
-o: generated seq files location
-p: number of parallel

## Data Processing
Input data are transformed by several pipeline classes, such as ScaleResize, ChannelNormalize, ImageFeatureToBatch, etc.

## Model
You can download **preTrain-MaskRCNN model** for BigDL by running
```bash
wget https://bigdlmodels.s3-us-west-2.amazonaws.com/segmentation/bigdl_mask-rcnn_COCO_0.10.0.model
```
This MaskRCNN model refers to [facebookresearch/maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark), and the model backbone is **R-50-FPN**.

## Test the Model
* Spark standalone, example command
```bash
spark-submit \
--master spark://xxx.xxx.xxx.xxx:xxxx \
--executor-cores cores_per_executor \
--total-executor-cores total_cores_for_the_job \
--driver-class-path dist/lib/bigdl-VERSION-jar-with-dependencies.jar \
--class com.intel.analytics.bigdl.models.maskrcnn.Test \
dist/lib/bigdl-VERSION-jar-with-dependencies.jar \
--batchSize batch_size \
-f hdfs://.../coco/val \
--model modelPath
```
* Spark yarn client mode, example command
```bash
spark-submit \
--master yarn \
--deploy-mode client \
--executor-cores cores_per_executor \
--num-executors executors_number \
--driver-class-path dist/lib/bigdl-VERSION-jar-with-dependencies.jar \
--class com.intel.analytics.bigdl.models.inception.Test \
dist/lib/bigdl-VERSION-jar-with-dependencies.jar \
--batchSize batch_size \
-f hdfs://.../coco/val \
--model modelPath
```
In the above command
* -f: where you put your COCO data, it should be a hdfs folder
* --model: the model snapshot file
* --batchSize: The mini-batch size. It is expected that the mini-batch size is a multiple of node_number * core_number.
* --partitionNum: the partition number, default is node_number * core_number.
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/*
* Copyright 2016 The BigDL Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.intel.analytics.bigdl.models.maskrcnn

import com.intel.analytics.bigdl.models.resnet.Utils.{TestParams, _}
import com.intel.analytics.bigdl.transform.vision.image._
import com.intel.analytics.bigdl.transform.vision.image.augmentation._
import com.intel.analytics.bigdl.utils.{Engine, T}
import scopt.OptionParser
import com.intel.analytics.bigdl.dataset.{DataSet, MiniBatch, segmentation}
import com.intel.analytics.bigdl.nn.Module
import com.intel.analytics.bigdl.optim.MeanAveragePrecision
import org.apache.spark.{SparkContext, rdd}
object Test {
case class TestParams(
folder: String = "./",
model: String = "",
batchSize: Int = 2,
partitionNum: Int = -1
)

val testParser = new OptionParser[TestParams]("BigDL Mask-RCNN on COCO Test Example") {
opt[String]('f', "folder")
.text("the location of COCO dataset")
.action((x, c) => c.copy(folder = x))

opt[String]('m', "model")
.text("the location of model snapshot")
.action((x, c) => c.copy(model = x))

opt[Int]('b', "batchSize")
.text("total batch size")
.action((x, c) => c.copy(batchSize = x))

opt[Int]('p', "partitionNum")
.text("partition number")
.action((x, c) => c.copy(partitionNum = x))
}

def main(args: Array[String]): Unit = {
testParser.parse(args, TestParams()).foreach { param => {
val conf = Engine.createSparkConf().setAppName("Test MaskRCNN on COCO")
.set("spark.akka.frameSize", 64.toString)
.set("spark.task.maxFailures", "1")
val sc = new SparkContext(conf)
Engine.init

val partitionNum = if (param.partitionNum > 0) param.partitionNum
else Engine.nodeNumber() * Engine.coreNumber()

val rddData = DataSet.SeqFileFolder.filesToRoiImageFrame(param.folder, sc, Some(partitionNum))
.toDistributed().data(train = false)

val transformer = MTImageFeatureToBatchWithResize(
sizeDivisible = 32,
batchSize = param.batchSize / Engine.nodeNumber(),
transformer =
PixelBytesToMat() ->
ScaleResize(minSize = 800, maxSize = 1333) ->
ChannelNormalize(122.7717f, 115.9465f, 102.9801f) ->
MatToTensor[Float](),
toRGB = false
)
val evaluationSet = transformer(rddData)

val model = Module.loadModule[Float](param.model)

val result = model.evaluate(evaluationSet,
Array(MeanAveragePrecision.cocoBBox(81), MeanAveragePrecision.cocoSegmentation(81)))
result.foreach(r => println(s"${r._2} is ${r._1}"))

sc.stop()
}}
}
}
21 changes: 14 additions & 7 deletions spark/dl/src/main/scala/com/intel/analytics/bigdl/nn/BoxHead.scala
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,11 @@ private[nn] class BoxPostProcessor(
totalDetections += maxDetection
outBBoxs[Tensor[Float]](i + 1).resize(maxDetection, 4)
totalROILables(i + 1) = roilabels
boxesInImage(i) = maxDetection
}
// clear others tensors in output
for (i <- (boxesInImage.length + 1) to outBBoxs.length()) {
outBBoxs.remove[Tensor[Float]](i)
}

// resize labels and scores
Expand All @@ -353,13 +358,15 @@ private[nn] class BoxPostProcessor(
var labelsOffset = outLabels.storageOffset() - 1
var scoresOffset = outScores.storageOffset() - 1
for (i <- 0 to boxesInImage.length - 1) {
val roilabels = totalROILables[Array[RoiLabel]](i + 1)
val bbox = outBBoxs[Tensor[Float]](i + 1).storage().array()
val bboxOffset = outBBoxs[Tensor[Float]](i + 1).storageOffset() - 1

resultToTensor(roilabels, labels, labelsOffset, bbox, bboxOffset, scores, scoresOffset)
labelsOffset += outBBoxs[Tensor[Float]](i + 1).size(1)
scoresOffset += outBBoxs[Tensor[Float]](i + 1).size(1)
if (boxesInImage(i) > 0) {
val roilabels = totalROILables[Array[RoiLabel]](i + 1)
val bbox = outBBoxs[Tensor[Float]](i + 1).storage().array()
val bboxOffset = outBBoxs[Tensor[Float]](i + 1).storageOffset() - 1

resultToTensor(roilabels, labels, labelsOffset, bbox, bboxOffset, scores, scoresOffset)
labelsOffset += outBBoxs[Tensor[Float]](i + 1).size(1)
scoresOffset += outBBoxs[Tensor[Float]](i + 1).size(1)
}
}

output
Expand Down
Loading

0 comments on commit 811f063

Please sign in to comment.