Skip to content

Commit

Permalink
Update the RoiLabel and MTImageFeatureToBatch (intel-analytics#2925)
Browse files Browse the repository at this point in the history
* Update the RoiLabel related files from Sequence-file related PR

* var -> val

* Bug fix for curBatchSize < batchSize. toRGB default to false

* add ROISIZE

* update documents

* update documents

* add UT

* fix document
  • Loading branch information
Menooker committed Oct 28, 2019
1 parent 6832581 commit 105e4c1
Show file tree
Hide file tree
Showing 10 changed files with 225 additions and 141 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import com.intel.analytics.bigdl.dataset.image.{LabeledBGRImage, _}
import com.intel.analytics.bigdl.dataset.segmentation.{COCODataset, COCODeserializer}
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.transform.vision.image.label.roi.RoiLabel
import com.intel.analytics.bigdl.transform.vision.image.{DistributedImageFrame, ImageFeature, ImageFrame, LocalImageFrame}
import com.intel.analytics.bigdl.transform.vision.image.{DistributedImageFrame, ImageFeature, ImageFrame, LocalImageFrame, RoiImageInfo}
import com.intel.analytics.bigdl.utils.{Engine, RandomGenerator, T}
import java.awt.Color
import java.awt.image.{BufferedImage, DataBufferByte}
Expand Down Expand Up @@ -643,7 +643,7 @@ object DataSet {
require(rawdata.length == height * width * 3)
val imf = ImageFeature(rawdata, RoiLabel(labelClasses, bboxes, masks), fileName)
imf(ImageFeature.originalSize) = (height, width, 3)
imf(RoiLabel.ISCROWD) = isCrowd
imf(RoiImageInfo.ISCROWD) = isCrowd
imf
}
.coalesce(num)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,14 @@ import scala.collection.mutable.ArrayBuffer

abstract class SegmentationMasks extends Serializable {
/**
* Convert to a RLE encoded tensor
* Convert to a RLE encoded masks
*/
def toRLE: RLEMasks

/**
* Get the height and width
*/
def size: (Int, Int)
}

/**
Expand All @@ -40,6 +45,11 @@ class PolyMasks(val poly: Array[Array[Float]], val height: Int, val width: Int)
require(height > 0 && width > 0, "the height and width must > 0 for toRLE")
MaskUtils.mergeRLEs(MaskUtils.poly2RLE(this, height, width), false)
}

/**
* Get the height and width
*/
override def size: (Int, Int) = (height, width)
}

object PolyMasks {
Expand Down Expand Up @@ -69,6 +79,8 @@ class RLEMasks(val counts: Array[Int], val height: Int, val width: Int)
extends SegmentationMasks {
override def toRLE: RLEMasks = this

override def size: (Int, Int) = (height, width)

// cached bbox value
@transient
lazy val bbox: (Float, Float, Float, Float) = MaskUtils.rleToOneBbox(this)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,15 @@
*/
package com.intel.analytics.bigdl.transform.vision.image

import com.intel.analytics.bigdl.dataset.segmentation.RLEMasks
import java.util.concurrent.atomic.AtomicInteger

import com.intel.analytics.bigdl.dataset.{MiniBatch, Sample, Transformer, Utils}
import com.intel.analytics.bigdl.nn.abstractnn.Activity
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.bigdl.tensor.{Storage, Tensor}
import com.intel.analytics.bigdl.transform.vision.image.label.roi.RoiLabel
import com.intel.analytics.bigdl.utils.{Engine, T, Table}

import scala.collection.mutable.IndexedSeq
import scala.reflect.ClassTag

object MTImageFeatureToBatch {
/**
* The transformer from ImageFeature to mini-batches
Expand All @@ -40,7 +37,7 @@ object MTImageFeatureToBatch {
* @return
*/
def apply(width: Int, height: Int, batchSize: Int,
transformer: FeatureTransformer, toRGB: Boolean = true, extractRoi: Boolean = false)
transformer: FeatureTransformer, toRGB: Boolean = false, extractRoi: Boolean = false)
: MTImageFeatureToBatch = {
if (extractRoi) {
new RoiMTImageFeatureToBatch (
Expand Down Expand Up @@ -162,7 +159,7 @@ private class PreFetch extends Transformer[ImageFeature, ImageFeature] {
* @param toRGB if converted to RGB, default format is BGR
*/
class ClassificationMTImageFeatureToBatch private[bigdl](width: Int, height: Int,
totalBatchSize: Int, transformer: FeatureTransformer, toRGB: Boolean = true)
totalBatchSize: Int, transformer: FeatureTransformer, toRGB: Boolean = false)
extends MTImageFeatureToBatch(totalBatchSize, transformer) {

private val frameLength = height * width
Expand All @@ -188,25 +185,92 @@ class ClassificationMTImageFeatureToBatch private[bigdl](width: Int, height: Int
}
}


object RoiImageInfo {
// the keys in the target table
// fields from RoiLabel
val CLASSES = "classes"
val BBOXES = "bboxes"
val MASKS = "masks"
// ISCROWD and ORIGSIZE are stored in ImageFeature
val ISCROWD = "is_crowd"
val ORIGSIZE = "orig_size"
val SCORES = "scores"
val IMGINFO = "imginfo"

/**
* Get the output score tensor from the table.
* (1 x N) tensor for N detections
*
* @param tab
* @return
*/
def getScores(tab: Table): Tensor[Float] = tab[Tensor[Float]](SCORES)

/**
* Get the class label tensor from the table. See RoiLabel.classes
* the categories for each detections (see RoiLabel.clasees field)
* (1 x N), or (2 x N) Tensor[Float]
*
* @param tab
* @return
*/
def getClasses(tab: Table): Tensor[Float] = tab[Tensor[Float]](CLASSES)

/**
* Get the bbox tensor from the table. See RoiLabel.bboxes
* @param tab
* @return
*/
def getBBoxes(tab: Table): Tensor[Float] = tab[Tensor[Float]](BBOXES)

/**
* Get the (optional) mask data from the table. See RoiLabel.masks
* @param tab
* @return
*/
def getMasks(tab: Table): Array[RLEMasks] = tab[Array[RLEMasks]](MASKS)

/**
* Get the isCrowd tensor from the table. Should be 1 x N vector (N is the # of detections)
* @param tab
* @return
*/
def getIsCrowd(tab: Table): Tensor[Float] = tab[Tensor[Float]](ISCROWD)

/**
* Get the size of the image before resizing
* @return (height, width, channel)
*/
def getOrigSize(tab: Table): (Int, Int, Int) = tab[(Int, Int, Int)](ORIGSIZE)

/**
* Get the isCrowd tensor from the table. Should be 1 x N vector (N is the # of detections)
* @param tab
* @return
*/
def getImageInfo(tab: Table): Tensor[Float] = tab[Tensor[Float]](IMGINFO)

}
/**
* A batch of images with flattened RoiLabels
* the getTarget() returns a Table with key from 1 to batchSize. Each key in the table is mapped to
* a Table for the annotation of an image in the batch. The annotation table holds the annotation
* info for one image (assume the image has N detections). The annotation table has
*
* Key Value
* RoiLabel.CLASSES the categories for each detections (see RoiLabel.clasees field)
* RoiImageInfo.CLASSES the categories for each detections (see RoiLabel.clasees field)
* (1 x N), or (2 x N) Tensor[Float]
* RoiLabel.BBOXES the bboxes, (N x 4) Tensor[Float]
* RoiLabel.MASKS (Optional) the mask data, Array[Tensor[Float]\]. The outer array has N
* RoiImageInfo.BBOXES the bboxes, (N x 4) Tensor[Float]
* RoiImageInfo.MASKS (Optional) the mask data, Array[Tensor[Float]\]. The outer array has N
* elements. The inner tensor holds the data for segmentation
* RoiLabel.ISCROWD Whether each detection is crowd. (1 x N) Tensor[Float].
* RoiImageInfo.ISCROWD Whether each detection is crowd. (1 x N) Tensor[Float].
* -1: unknown, 0: not crowd, 1: is crowd
* RoiLabel.ImageInfo with shape (batchSize, 4), contains all images info
* RoiImageInfo.IMGINFO with shape (batchSize, 4), contains all images info
* (height, width, original height, original width)
*/
class RoiMiniBatch(val input: Tensor[Float], val target: IndexedSeq[RoiLabel],
val isCrowd: IndexedSeq[Tensor[Float]], val imageInfo: Tensor[Float] = null)
class RoiMiniBatch(val input: Tensor[Float], val target: Array[RoiLabel],
val isCrowd: Array[Tensor[Float]], val imageInfo: Tensor[Float] = null)
extends MiniBatch[Float] {

override def size(): Int = input.size(1)
Expand All @@ -216,20 +280,18 @@ class RoiMiniBatch(val input: Tensor[Float], val target: IndexedSeq[RoiLabel],
}

override def getTarget(): Table = {
var i = 0
val tables = (target, isCrowd).zipped.map { case (roiLabel, crowd) =>
i += 1
val tables = (target, isCrowd, 1 to isCrowd.length).zipped.map { case (roiLabel, crowd, i) =>
roiLabel.toTable
.update(RoiLabel.ISCROWD, crowd)
.update(RoiLabel.IMGINFO, imageInfo.select(1, i))
.update(RoiImageInfo.ISCROWD, crowd)
.update(RoiImageInfo.IMGINFO, imageInfo.select(1, i))
}
T.seq(tables)
}

override def slice(offset: Int, length: Int): MiniBatch[Float] = {
val subInput = input.narrow(1, offset, length)
val subTarget = target.view(offset - 1, length) // offset starts from 1
val subIsCrowd = isCrowd.view(offset - 1, length) // offset starts from 1
val subTarget = target.slice(offset - 1, length) // offset starts from 1
val subIsCrowd = isCrowd.slice(offset - 1, length) // offset starts from 1
val subSize = imageInfo.narrow(1, offset, length)
RoiMiniBatch(subInput, subTarget, subIsCrowd, subSize)
}
Expand All @@ -241,8 +303,8 @@ class RoiMiniBatch(val input: Tensor[Float], val target: IndexedSeq[RoiLabel],
}

object RoiMiniBatch {
def apply(data: Tensor[Float], target: IndexedSeq[RoiLabel],
isCrowd: IndexedSeq[Tensor[Float]], imageInfo: Tensor[Float] = null):
def apply(data: Tensor[Float], target: Array[RoiLabel],
isCrowd: Array[Tensor[Float]], imageInfo: Tensor[Float] = null):
RoiMiniBatch = new RoiMiniBatch(data, target, isCrowd, imageInfo)
}

Expand All @@ -258,31 +320,40 @@ object RoiMiniBatch {
* @param toRGB if converted to RGB, default format is BGR
*/
class RoiMTImageFeatureToBatch private[bigdl](width: Int, height: Int,
totalBatchSize: Int, transformer: FeatureTransformer, toRGB: Boolean = true)
totalBatchSize: Int, transformer: FeatureTransformer, toRGB: Boolean = false)
extends MTImageFeatureToBatch(totalBatchSize, transformer) {

private val frameLength = height * width
private val featureData: Array[Float] = new Array[Float](batchSize * frameLength * 3)
private val labelData: Array[RoiLabel] = new Array[RoiLabel](batchSize)
private val isCrowdData: Array[Tensor[Float]] = new Array[Tensor[Float]](batchSize)
private var featureTensor: Tensor[Float] = null
private val imgInfoData: Tensor[Float] = Tensor[Float](batchSize, 4)
private var featureTensor: Tensor[Float] = Tensor[Float]()

override protected def processImageFeature(img: ImageFeature, position: Int): Unit = {
img.copyTo(featureData, position * frameLength * 3, toRGB = toRGB)
val isCrowd = img(RoiLabel.ISCROWD).asInstanceOf[Tensor[Float]]
val isCrowd = img(RoiImageInfo.ISCROWD).asInstanceOf[Tensor[Float]]
val label = img.getLabel.asInstanceOf[RoiLabel]
require(label.bboxes.size(1) == isCrowd.size(1), "The number of detections" +
"in ImageFeature's ISCROWD should be equal to the number of detections in the RoiLabel")
isCrowdData(position) = isCrowd
labelData(position) = label
imgInfoData.setValue(position + 1, 1, img.getHeight())
imgInfoData.setValue(position + 1, 2, img.getWidth())
imgInfoData.setValue(position + 1, 3, img.getOriginalHeight)
imgInfoData.setValue(position + 1, 4, img.getOriginalWidth)
}

override protected def createBatch(batchSize: Int): MiniBatch[Float] = {
if (featureTensor == null) {
featureTensor = Tensor(Storage[Float](featureData),
storageOffset = 1, size = Array(batchSize, 3, height, width))
override protected def createBatch(curBatchSize: Int): MiniBatch[Float] = {
if (featureTensor.nElement() != curBatchSize) {
featureTensor.set(Storage[Float](featureData),
storageOffset = 1, sizes = Array(curBatchSize, 3, height, width))
}
def arraySlice[T](array: Array[T]) = {
if (array.length == curBatchSize) array else array.slice(0, curBatchSize)
}
RoiMiniBatch(featureTensor, labelData.view, isCrowdData.view)
RoiMiniBatch(featureTensor, arraySlice(labelData), arraySlice(isCrowdData),
imgInfoData.narrow(1, 1, curBatchSize))
}
}

Expand Down Expand Up @@ -326,7 +397,7 @@ class RoiImageFeatureToBatchWithResize private[bigdl](sizeDivisible: Int = -1, t
imageBuffer(position).resize(3, img.getHeight(), img.getWidth())
// save img to buffer
img.copyTo(imageBuffer(position).storage().array(), 0, toRGB = toRGB)
val isCrowd = img(RoiLabel.ISCROWD).asInstanceOf[Tensor[Float]]
val isCrowd = img(RoiImageInfo.ISCROWD).asInstanceOf[Tensor[Float]]
val label = img.getLabel.asInstanceOf[RoiLabel]
if (isCrowd != null && label != null) {
require(label.bboxes.size(1) == isCrowd.size(1), "The number of detections" +
Expand All @@ -345,11 +416,14 @@ class RoiImageFeatureToBatchWithResize private[bigdl](sizeDivisible: Int = -1, t
if (featureTensor == null) featureTensor = Tensor()
featureTensor.resize(batchSize, 3, height, wide).fill(0.0f)
// copy img buffer to feature tensor
for (i <- 0 to (batchSize - 1)) {
for (i <- 0 until batchSize) {
featureTensor.select(1, i + 1).narrow(2, 1, imageBuffer(i).size(2))
.narrow(3, 1, imageBuffer(i).size(3)).copy(imageBuffer(i))
}
RoiMiniBatch(featureTensor, labelData.view(0, batchSize),
isCrowdData.view(0, batchSize), imgInfoData.narrow(1, 1, batchSize))
def arraySlice[T](array: Array[T]) = {
if (array.length == batchSize) array else array.slice(0, batchSize)
}
RoiMiniBatch(featureTensor, arraySlice(labelData),
arraySlice(isCrowdData), imgInfoData.narrow(1, 1, batchSize))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,17 @@

package com.intel.analytics.bigdl.transform.vision.image.label.roi

import com.intel.analytics.bigdl.dataset.segmentation.{RLEMasks, SegmentationMasks}
import com.intel.analytics.bigdl.dataset.segmentation.{MaskUtils, SegmentationMasks, RLEMasks}
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.transform.vision.image.RoiImageInfo
import com.intel.analytics.bigdl.utils.{T, Table}
import com.intel.analytics.bigdl.dataset.segmentation.{RLEMasks, SegmentationMasks}

/**
* image target with classes and bounding boxes
*
* @param classes N (class labels) or 2 * N, the first row is class labels,
* the second line is difficults
* @param bboxes N * 4
* @param bboxes N * 4, (xmin, ymin, xmax, ymax)
* @param masks the array of annotation masks of the targets
*/
case class RoiLabel(classes: Tensor[Float], bboxes: Tensor[Float],
Expand All @@ -45,8 +45,8 @@ case class RoiLabel(classes: Tensor[Float], bboxes: Tensor[Float],
s"be equal to the number of mask array ${masks.length}")
}
} else if (classes.nElement() > 0 && classes.dim() == 2) {
require(classes.size(2) == bboxes.size(1), s"the number of classes ${ classes.size(2) }" +
s"should be equal to the number of bounding box numbers ${ bboxes.size(1) }")
require(classes.size(2) == bboxes.size(1), s"the number of classes ${classes.size(2)}" +
s"should be equal to the number of bounding box numbers ${bboxes.size(1)}")
if (masks != null) {
require(classes.size(2) == masks.length, s"the number of classes ${classes.size(2)}" +
s"should be equal to the number of bounding box numbers ${masks.length}")
Expand All @@ -57,10 +57,11 @@ case class RoiLabel(classes: Tensor[Float], bboxes: Tensor[Float],
def toTable: Table = {
val table = T()
if (masks != null) {
table(RoiLabel.MASKS) = masks.map(_.toRLE)
require(masks.length > 0, "The masks can either be null or a non-empty array")
table(RoiImageInfo.MASKS) = masks.map(_.toRLE)
}
table(RoiLabel.CLASSES) = classes
table(RoiLabel.BBOXES) = bboxes
table(RoiImageInfo.CLASSES) = classes
table(RoiImageInfo.BBOXES) = bboxes
table
}

Expand All @@ -70,23 +71,6 @@ case class RoiLabel(classes: Tensor[Float], bboxes: Tensor[Float],
}

object RoiLabel {
val CLASSES = "classes"
val BBOXES = "bboxes"
val MASKS = "masks"
// ISCROWD and ORIGSIZE are stored in ImageFeature
val ISCROWD = "is_crowd"
val IMGINFO = "imgInfo"
val SCORES = "scores"


def getClasses(tab: Table): Tensor[Float] = tab[Tensor[Float]](CLASSES)
def getBBoxes(tab: Table): Tensor[Float] = tab[Tensor[Float]](BBOXES)
def getMasks(tab: Table): Array[RLEMasks] = tab[Array[RLEMasks]](MASKS)
def getIsCrowd(tab: Table): Tensor[Float] = tab[Tensor[Float]](ISCROWD)
def getScores(tab: Table): Tensor[Float] = tab[Tensor[Float]](SCORES)
def getImgInfo(tab: Table): Tensor[Float] = tab[Tensor[Float]](IMGINFO)


def fromTensor(tensor: Tensor[Float]): RoiLabel = {
val label = tensor.narrow(2, 1, 2).transpose(1, 2).contiguous()
val rois = tensor.narrow(2, 3, 4)
Expand Down
Loading

0 comments on commit 105e4c1

Please sign in to comment.