Skip to content

Commit

Permalink
add maskrcnn inference example (intel-analytics#2944)
Browse files Browse the repository at this point in the history
* add maskrcnn inference example

* meet pr comments

* add model download url
  • Loading branch information
zhangxiaoli73 committed Oct 28, 2019
1 parent 2861c45 commit b4d930a
Show file tree
Hide file tree
Showing 11 changed files with 294 additions and 92 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -356,13 +356,14 @@ object DataSet {
/**
* Wrap a RDD as a DataSet.
* @param data
* @param partitionNum repartition data rdd to partition number, default node number.
* @tparam T
* @return
*/
def rdd[T: ClassTag](data: RDD[T]): DistributedDataSet[T] = {
val nodeNumber = Engine.nodeNumber()
def rdd[T: ClassTag](data: RDD[T], partitionNum: Int = Engine.nodeNumber()
): DistributedDataSet[T] = {
new CachedDistriDataSet[T](
data.coalesce(nodeNumber, true)
data.coalesce(partitionNum, true)
.mapPartitions(iter => {
Iterator.single(iter.toArray)
}).setName("cached dataset")
Expand Down Expand Up @@ -646,7 +647,7 @@ object DataSet {
imf
}
.coalesce(num)
DataSet.rdd(rawData)
DataSet.rdd(rawData, num)
}

private[bigdl] def filesToImageFeatureDataset(url: String, sc: SparkContext,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,14 @@
package com.intel.analytics.bigdl.transform.vision.image

import java.util.concurrent.atomic.AtomicInteger

import com.intel.analytics.bigdl.dataset.{MiniBatch, Sample, Transformer, Utils}
import com.intel.analytics.bigdl.nn.abstractnn.Activity
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.bigdl.tensor.{Storage, Tensor}
import com.intel.analytics.bigdl.transform.vision.image.label.roi.RoiLabel
import com.intel.analytics.bigdl.utils.{Engine, T, Table}

import scala.collection.mutable.IndexedSeq
import scala.reflect.ClassTag

Expand Down Expand Up @@ -199,23 +202,26 @@ class ClassificationMTImageFeatureToBatch private[bigdl](width: Int, height: Int
* elements. The inner tensor holds the data for segmentation
* RoiLabel.ISCROWD Whether each detection is crowd. (1 x N) Tensor[Float].
* -1: unknown, 0: not crowd, 1: is crowd
* RoiLabel.ORIGSIZE The original size of the image, tuple of (height, width, channels)
* RoiLabel.ImageInfo with shape (batchSize, 4), contains all images info
* (height, width, original height, original width)
*/
class RoiMiniBatch(val input: Tensor[Float], val target: IndexedSeq[RoiLabel],
val isCrowd: IndexedSeq[Tensor[Float]], val originalSizes: IndexedSeq[(Int, Int, Int)])
val isCrowd: IndexedSeq[Tensor[Float]], val imageInfo: Tensor[Float] = null)
extends MiniBatch[Float] {

override def size(): Int = {
input.size(1)
}
override def size(): Int = input.size(1)

override def getInput(): Tensor[Float] = input
override def getInput(): Activity = {
if (imageInfo == null) input else T(input, imageInfo)
}

override def getTarget(): Table = {
val tables = (target, isCrowd, originalSizes).zipped.map { case (roiLabel, crowd, size) =>
var i = 0
val tables = (target, isCrowd).zipped.map { case (roiLabel, crowd) =>
i += 1
roiLabel.toTable
.update(RoiLabel.ISCROWD, crowd)
.update(RoiLabel.ORIGSIZE, size)
.update(RoiLabel.IMGINFO, imageInfo.select(1, i))
}
T.seq(tables)
}
Expand All @@ -224,7 +230,7 @@ class RoiMiniBatch(val input: Tensor[Float], val target: IndexedSeq[RoiLabel],
val subInput = input.narrow(1, offset, length)
val subTarget = target.view(offset - 1, length) // offset starts from 1
val subIsCrowd = isCrowd.view(offset - 1, length) // offset starts from 1
val subSize = originalSizes.view(offset - 1, length) // offset starts from 1
val subSize = imageInfo.narrow(1, offset, length)
RoiMiniBatch(subInput, subTarget, subIsCrowd, subSize)
}

Expand All @@ -236,8 +242,8 @@ class RoiMiniBatch(val input: Tensor[Float], val target: IndexedSeq[RoiLabel],

object RoiMiniBatch {
def apply(data: Tensor[Float], target: IndexedSeq[RoiLabel],
isCrowd: IndexedSeq[Tensor[Float]], originalSizes: IndexedSeq[(Int, Int, Int)]):
RoiMiniBatch = new RoiMiniBatch(data, target, isCrowd, originalSizes)
isCrowd: IndexedSeq[Tensor[Float]], imageInfo: Tensor[Float] = null):
RoiMiniBatch = new RoiMiniBatch(data, target, isCrowd, imageInfo)
}


Expand All @@ -259,7 +265,6 @@ class RoiMTImageFeatureToBatch private[bigdl](width: Int, height: Int,
private val featureData: Array[Float] = new Array[Float](batchSize * frameLength * 3)
private val labelData: Array[RoiLabel] = new Array[RoiLabel](batchSize)
private val isCrowdData: Array[Tensor[Float]] = new Array[Tensor[Float]](batchSize)
private val origSizeData: Array[(Int, Int, Int)] = new Array[(Int, Int, Int)](batchSize)
private var featureTensor: Tensor[Float] = null

override protected def processImageFeature(img: ImageFeature, position: Int): Unit = {
Expand All @@ -270,15 +275,14 @@ class RoiMTImageFeatureToBatch private[bigdl](width: Int, height: Int,
"in ImageFeature's ISCROWD should be equal to the number of detections in the RoiLabel")
isCrowdData(position) = isCrowd
labelData(position) = label
origSizeData(position) = img.getOriginalSize
}

override protected def createBatch(batchSize: Int): MiniBatch[Float] = {
if (featureTensor == null) {
featureTensor = Tensor(Storage[Float](featureData),
storageOffset = 1, size = Array(batchSize, 3, height, width))
}
RoiMiniBatch(featureTensor, labelData.view, isCrowdData.view, origSizeData.view)
RoiMiniBatch(featureTensor, labelData.view, isCrowdData.view)
}
}

Expand All @@ -298,7 +302,7 @@ class RoiImageFeatureToBatchWithResize private[bigdl](sizeDivisible: Int = -1, t

private val labelData: Array[RoiLabel] = new Array[RoiLabel](batchSize)
private val isCrowdData: Array[Tensor[Float]] = new Array[Tensor[Float]](batchSize)
private val origSizeData: Array[(Int, Int, Int)] = new Array[(Int, Int, Int)](batchSize)
private val imgInfoData: Tensor[Float] = Tensor[Float](batchSize, 4)
private var featureTensor: Tensor[Float] = null
private val imageBuffer = new Array[Tensor[Float]](batchSize)

Expand All @@ -324,11 +328,16 @@ class RoiImageFeatureToBatchWithResize private[bigdl](sizeDivisible: Int = -1, t
img.copyTo(imageBuffer(position).storage().array(), 0, toRGB = toRGB)
val isCrowd = img(RoiLabel.ISCROWD).asInstanceOf[Tensor[Float]]
val label = img.getLabel.asInstanceOf[RoiLabel]
require(label.bboxes.size(1) == isCrowd.size(1), "The number of detections" +
"in ImageFeature's ISCROWD should be equal to the number of detections in the RoiLabel")
if (isCrowd != null && label != null) {
require(label.bboxes.size(1) == isCrowd.size(1), "The number of detections" +
"in ImageFeature's ISCROWD should be equal to the number of detections in the RoiLabel")
}
isCrowdData(position) = isCrowd
labelData(position) = label
origSizeData(position) = img.getOriginalSize
imgInfoData.setValue(position + 1, 1, img.getHeight())
imgInfoData.setValue(position + 1, 2, img.getWidth())
imgInfoData.setValue(position + 1, 3, img.getOriginalHeight)
imgInfoData.setValue(position + 1, 4, img.getOriginalWidth)
}

override protected def createBatch(batchSize: Int): MiniBatch[Float] = {
Expand All @@ -341,6 +350,6 @@ class RoiImageFeatureToBatchWithResize private[bigdl](sizeDivisible: Int = -1, t
.narrow(3, 1, imageBuffer(i).size(3)).copy(imageBuffer(i))
}
RoiMiniBatch(featureTensor, labelData.view(0, batchSize),
isCrowdData.view(0, batchSize), origSizeData.view(0, batchSize))
isCrowdData.view(0, batchSize), imgInfoData.narrow(1, 1, batchSize))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -75,20 +75,16 @@ object RoiLabel {
val MASKS = "masks"
// ISCROWD and ORIGSIZE are stored in ImageFeature
val ISCROWD = "is_crowd"
val ORIGSIZE = "size"
val IMGINFO = "imgInfo"
val SCORES = "scores"

def getScores(tab: Table): Tensor[Float] = tab[Tensor[Float]](SCORES)

def getClasses(tab: Table): Tensor[Float] = tab[Tensor[Float]](CLASSES)
def getBBoxes(tab: Table): Tensor[Float] = tab[Tensor[Float]](BBOXES)
def getMasks(tab: Table): Array[RLEMasks] = tab[Array[RLEMasks]](MASKS)
def getIsCrowd(tab: Table): Tensor[Float] = tab[Tensor[Float]](ISCROWD)

/**
* @return (height, width, channel)
*/
def getOrigSize(tab: Table): (Int, Int, Int) =
tab[(Int, Int, Int)](ORIGSIZE)
def getScores(tab: Table): Tensor[Float] = tab[Tensor[Float]](SCORES)
def getImgInfo(tab: Table): Tensor[Float] = tab[Tensor[Float]](IMGINFO)


def fromTensor(tensor: Tensor[Float]): RoiLabel = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ package com.intel.analytics.bigdl.models.maskrcnn

import com.intel.analytics.bigdl.Module
import com.intel.analytics.bigdl.dataset.segmentation.{MaskUtils, RLEMasks}
import com.intel.analytics.bigdl.models.resnet.{Convolution, Sbn}
import com.intel.analytics.bigdl.nn._
import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, Activity}
import com.intel.analytics.bigdl.serialization.Bigdl.{AttrValue, BigDLModule}
Expand Down Expand Up @@ -78,14 +77,28 @@ class MaskRCNN(val inChannels: Int,
modules.append(boxHead.asInstanceOf[Module[Float]])
modules.append(maskHead.asInstanceOf[Module[Float]])

private def buildResNet50(): Module[Float] = {
private def buildResNet50(): Module[Float] = {

def convolution (nInputPlane: Int, nOutputPlane: Int, kernelW: Int, kernelH: Int,
strideW: Int = 1, strideH: Int = 1, padW: Int = 0, padH: Int = 0,
nGroup: Int = 1, propagateBack: Boolean = true): SpatialConvolution[Float] = {
val conv = SpatialConvolution[Float](nInputPlane, nOutputPlane, kernelW, kernelH,
strideW, strideH, padW, padH, nGroup, propagateBack, withBias = false)
conv.setInitMethod(MsraFiller(false), Zeros)
conv
}

def sbn(nOutput: Int, eps: Double = 1e-3, momentum: Double = 0.1, affine: Boolean = true)
: SpatialBatchNormalization[Float] = {
SpatialBatchNormalization[Float](nOutput, eps, momentum, affine).setInitMethod(Ones, Zeros)
}

def shortcut(nInputPlane: Int, nOutputPlane: Int, stride: Int,
useConv: Boolean = false): Module[Float] = {
if (useConv) {
Sequential()
.add(Convolution(nInputPlane, nOutputPlane, 1, 1, stride, stride))
.add(Sbn(nOutputPlane))
.add(convolution(nInputPlane, nOutputPlane, 1, 1, stride, stride))
.add(sbn(nOutputPlane))
} else {
Identity()
}
Expand All @@ -94,14 +107,14 @@ class MaskRCNN(val inChannels: Int,
def bottleneck(nInputPlane: Int, internalPlane: Int, nOutputPlane: Int,
stride: Int, useConv: Boolean = false): Module[Float] = {
val s = Sequential()
.add(Convolution(nInputPlane, internalPlane, 1, 1, stride, stride, 0, 0))
.add(Sbn(internalPlane))
.add(convolution(nInputPlane, internalPlane, 1, 1, stride, stride, 0, 0))
.add(sbn(internalPlane))
.add(ReLU(true))
.add(Convolution(internalPlane, internalPlane, 3, 3, 1, 1, 1, 1))
.add(Sbn(internalPlane))
.add(convolution(internalPlane, internalPlane, 3, 3, 1, 1, 1, 1))
.add(sbn(internalPlane))
.add(ReLU(true))
.add(Convolution(internalPlane, nOutputPlane, 1, 1, 1, 1, 0, 0))
.add(Sbn(nOutputPlane))
.add(convolution(internalPlane, nOutputPlane, 1, 1, 1, 1, 0, 0))
.add(sbn(nOutputPlane))

val m = Sequential()
.add(ConcatTable()
Expand All @@ -123,8 +136,8 @@ class MaskRCNN(val inChannels: Int,
}

val model = Sequential[Float]()
.add(Convolution(3, 64, 7, 7, 2, 2, 3, 3, optnet = false, propagateBack = false))
.add(Sbn(64))
.add(convolution(3, 64, 7, 7, 2, 2, 3, 3, propagateBack = false))
.add(sbn(64))
.add(ReLU(true))
.add(SpatialMaxPooling(3, 3, 2, 2, 1, 1))

Expand Down Expand Up @@ -164,12 +177,18 @@ class MaskRCNN(val inChannels: Int,
val labelsBox = postProcessorBox[Tensor[Float]](1)
val proposalsBox = postProcessorBox[Table](2)
val scores = postProcessorBox[Tensor[Float]](3)
val masks = this.maskHead.forward(T(features, proposalsBox, labelsBox)).toTable
if (this.isTraining()) {
output = T(proposalsBox, labelsBox, masks, scores)
} else {
output = postProcessorForMaskRCNN(proposalsBox, labelsBox, masks[Tensor[Float]](2),
scores, imageInfo)
if (labelsBox.size(1) > 0) {
val masks = this.maskHead.forward(T(features, proposalsBox, labelsBox)).toTable
if (this.isTraining()) {
output = T(proposalsBox, labelsBox, masks, scores)
} else {
output = postProcessorForMaskRCNN(proposalsBox, labelsBox, masks[Tensor[Float]](2),
scores, imageInfo)
}
} else { // detect nothing
for (i <- 1 to inputFeatures.size(1)) {
output.toTable(i) = T()
}
}

output
Expand All @@ -196,36 +215,39 @@ class MaskRCNN(val inChannels: Int,

binaryMask.resize(originalHeight, originalWidth)

val boxNumber = boxesInImage(i)
val maskPerImg = masks.narrow(1, start, boxNumber)
val bboxPerImg = bboxes[Tensor[Float]](i + 1)
val classPerImg = labels.narrow(1, start, boxNumber)
val scorePerImg = scores.narrow(1, start, boxNumber)

require(maskPerImg.size(1) == bboxPerImg.size(1),
s"mask number ${maskPerImg.size(1)} should be same with box number ${bboxPerImg.size(1)}")

// bbox resize to original size
if (height != originalHeight || width != originalWidth) {
BboxUtil.scaleBBox(bboxPerImg,
originalHeight.toFloat / height, originalWidth.toFloat / width)
}
// mask decode to original size
val masksRLE = new Array[RLEMasks](boxNumber)
for (j <- 0 to boxNumber - 1) {
binaryMask.fill(0.0f)
Utils.decodeMaskInImage(maskPerImg.select(1, j + 1), bboxPerImg.select(1, j + 1),
binaryMask = binaryMask)
masksRLE(j) = MaskUtils.binaryToRLE(binaryMask)
}
start += boxNumber

// prepare for evaluation
val postOutput = T()
postOutput.update(RoiLabel.MASKS, masksRLE)
postOutput.update(RoiLabel.BBOXES, bboxPerImg)
postOutput.update(RoiLabel.CLASSES, classPerImg)
postOutput.update(RoiLabel.SCORES, scorePerImg)

val boxNumber = boxesInImage(i)
if (boxNumber > 0) {
val maskPerImg = masks.narrow(1, start, boxNumber)
val bboxPerImg = bboxes[Tensor[Float]](i + 1)
val classPerImg = labels.narrow(1, start, boxNumber)
val scorePerImg = scores.narrow(1, start, boxNumber)

require(maskPerImg.size(1) == bboxPerImg.size(1), s"mask number ${maskPerImg.size(1)} " +
s"should be the same with box number ${bboxPerImg.size(1)}")

// resize bbox to original size
if (height != originalHeight || width != originalWidth) {
BboxUtil.scaleBBox(bboxPerImg,
originalHeight.toFloat / height, originalWidth.toFloat / width)
}
// decode mask to original size
val masksRLE = new Array[RLEMasks](boxNumber)
for (j <- 0 to boxNumber - 1) {
binaryMask.fill(0.0f)
Utils.decodeMaskInImage(maskPerImg.select(1, j + 1), bboxPerImg.select(1, j + 1),
binaryMask = binaryMask)
masksRLE(j) = MaskUtils.binaryToRLE(binaryMask)
}
start += boxNumber

postOutput.update(RoiLabel.MASKS, masksRLE)
postOutput.update(RoiLabel.BBOXES, bboxPerImg)
postOutput.update(RoiLabel.CLASSES, classPerImg)
postOutput.update(RoiLabel.SCORES, scorePerImg)
}

output(i + 1) = postOutput
}
Expand Down
Loading

0 comments on commit b4d930a

Please sign in to comment.