Update the RoiLabel and MTImageFeatureToBatch (intel-analytics#2925)

* Update the RoiLabel related files from Sequence-file related PR * var -> val * Bug fix for curBatchSize < batchSize. toRGB default to false * add ROISIZE * update documents * update documents * add UT * fix document
dding3 · Oct 28, 2019 · 105e4c1 · 105e4c1
1 parent 6832581
commit 105e4c1
Show file tree

Hide file tree

Showing 10 changed files with 225 additions and 141 deletions.
diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/DataSet.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/DataSet.scala
@@ -24,7 +24,7 @@ import com.intel.analytics.bigdl.dataset.image.{LabeledBGRImage, _}
 import com.intel.analytics.bigdl.dataset.segmentation.{COCODataset, COCODeserializer}
 import com.intel.analytics.bigdl.tensor.Tensor
 import com.intel.analytics.bigdl.transform.vision.image.label.roi.RoiLabel
-import com.intel.analytics.bigdl.transform.vision.image.{DistributedImageFrame, ImageFeature, ImageFrame, LocalImageFrame}
+import com.intel.analytics.bigdl.transform.vision.image.{DistributedImageFrame, ImageFeature, ImageFrame, LocalImageFrame, RoiImageInfo}
 import com.intel.analytics.bigdl.utils.{Engine, RandomGenerator, T}
 import java.awt.Color
 import java.awt.image.{BufferedImage, DataBufferByte}
@@ -643,7 +643,7 @@ object DataSet {
           require(rawdata.length == height * width * 3)
           val imf = ImageFeature(rawdata, RoiLabel(labelClasses, bboxes, masks), fileName)
           imf(ImageFeature.originalSize) = (height, width, 3)
-          imf(RoiLabel.ISCROWD) = isCrowd
+          imf(RoiImageInfo.ISCROWD) = isCrowd
           imf
         }
         .coalesce(num)

diff --git a/...c/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/segmentation/MaskUtils.scala b/...c/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/segmentation/MaskUtils.scala
@@ -22,9 +22,14 @@ import scala.collection.mutable.ArrayBuffer
 
 abstract class SegmentationMasks extends Serializable {
   /**
-   * Convert to a RLE encoded tensor
+   * Convert to a RLE encoded masks
    */
   def toRLE: RLEMasks
+
+  /**
+   * Get the height and width
+   */
+  def size: (Int, Int)
 }
 
 /**
@@ -40,6 +45,11 @@ class PolyMasks(val poly: Array[Array[Float]], val height: Int, val width: Int)
     require(height > 0 && width > 0, "the height and width must > 0 for toRLE")
     MaskUtils.mergeRLEs(MaskUtils.poly2RLE(this, height, width), false)
   }
+
+  /**
+   * Get the height and width
+   */
+  override def size: (Int, Int) = (height, width)
 }
 
 object PolyMasks {
@@ -69,6 +79,8 @@ class RLEMasks(val counts: Array[Int], val height: Int, val width: Int)
   extends SegmentationMasks {
   override def toRLE: RLEMasks = this
 
+  override def size: (Int, Int) = (height, width)
+
   // cached bbox value
   @transient
   lazy val bbox: (Float, Float, Float, Float) = MaskUtils.rleToOneBbox(this)

diff --git a/...om/intel/analytics/bigdl/dllib/feature/transform/vision/image/MTImageFeatureToBatch.scala b/...om/intel/analytics/bigdl/dllib/feature/transform/vision/image/MTImageFeatureToBatch.scala
@@ -15,18 +15,15 @@
  */
 package com.intel.analytics.bigdl.transform.vision.image
 
+import com.intel.analytics.bigdl.dataset.segmentation.RLEMasks
 import java.util.concurrent.atomic.AtomicInteger
-
 import com.intel.analytics.bigdl.dataset.{MiniBatch, Sample, Transformer, Utils}
 import com.intel.analytics.bigdl.nn.abstractnn.Activity
 import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
 import com.intel.analytics.bigdl.tensor.{Storage, Tensor}
 import com.intel.analytics.bigdl.transform.vision.image.label.roi.RoiLabel
 import com.intel.analytics.bigdl.utils.{Engine, T, Table}
 
-import scala.collection.mutable.IndexedSeq
-import scala.reflect.ClassTag
-
 object MTImageFeatureToBatch {
   /**
    * The transformer from ImageFeature to mini-batches
@@ -40,7 +37,7 @@ object MTImageFeatureToBatch {
    * @return
    */
   def apply(width: Int, height: Int, batchSize: Int,
-            transformer: FeatureTransformer, toRGB: Boolean = true, extractRoi: Boolean = false)
+            transformer: FeatureTransformer, toRGB: Boolean = false, extractRoi: Boolean = false)
   : MTImageFeatureToBatch = {
     if (extractRoi) {
       new RoiMTImageFeatureToBatch (
@@ -162,7 +159,7 @@ private class PreFetch extends Transformer[ImageFeature, ImageFeature] {
  * @param toRGB  if converted to RGB, default format is BGR
  */
 class ClassificationMTImageFeatureToBatch private[bigdl](width: Int, height: Int,
-  totalBatchSize: Int, transformer: FeatureTransformer, toRGB: Boolean = true)
+  totalBatchSize: Int, transformer: FeatureTransformer, toRGB: Boolean = false)
   extends MTImageFeatureToBatch(totalBatchSize, transformer) {
 
   private val frameLength = height * width
@@ -188,25 +185,92 @@ class ClassificationMTImageFeatureToBatch private[bigdl](width: Int, height: Int
   }
 }
 
+
+object RoiImageInfo {
+  // the keys in the target table
+  // fields from RoiLabel
+  val CLASSES = "classes"
+  val BBOXES = "bboxes"
+  val MASKS = "masks"
+  // ISCROWD and ORIGSIZE are stored in ImageFeature
+  val ISCROWD = "is_crowd"
+  val ORIGSIZE = "orig_size"
+  val SCORES = "scores"
+  val IMGINFO = "imginfo"
+
+  /**
+   * Get the output score tensor from the table.
+   *    (1 x N) tensor for N detections
+   *
+   * @param tab
+   * @return
+   */
+  def getScores(tab: Table): Tensor[Float] = tab[Tensor[Float]](SCORES)
+
+  /**
+   * Get the class label tensor from the table. See RoiLabel.classes
+   *    the categories for each detections (see RoiLabel.clasees field)
+   *    (1 x N), or (2 x N) Tensor[Float]
+   *
+   * @param tab
+   * @return
+   */
+  def getClasses(tab: Table): Tensor[Float] = tab[Tensor[Float]](CLASSES)
+
+  /**
+   * Get the bbox tensor from the table. See RoiLabel.bboxes
+   * @param tab
+   * @return
+   */
+  def getBBoxes(tab: Table): Tensor[Float] = tab[Tensor[Float]](BBOXES)
+
+  /**
+   * Get the (optional) mask data from the table. See RoiLabel.masks
+   * @param tab
+   * @return
+   */
+  def getMasks(tab: Table): Array[RLEMasks] = tab[Array[RLEMasks]](MASKS)
+
+  /**
+   * Get the isCrowd tensor from the table. Should be 1 x N vector (N is the # of detections)
+   * @param tab
+   * @return
+   */
+  def getIsCrowd(tab: Table): Tensor[Float] = tab[Tensor[Float]](ISCROWD)
+
+  /**
+   * Get the size of the image before resizing
+   * @return (height, width, channel)
+   */
+  def getOrigSize(tab: Table): (Int, Int, Int) = tab[(Int, Int, Int)](ORIGSIZE)
+
+  /**
+   * Get the isCrowd tensor from the table. Should be 1 x N vector (N is the # of detections)
+   * @param tab
+   * @return
+   */
+  def getImageInfo(tab: Table): Tensor[Float] = tab[Tensor[Float]](IMGINFO)
+
+}
 /**
  * A batch of images with flattened RoiLabels
  * the getTarget() returns a Table with key from 1 to batchSize. Each key in the table is mapped to
  * a Table for the annotation of an image in the batch. The annotation table holds the annotation
  * info for one image (assume the image has N detections). The annotation table has
  *
  * Key                Value
- * RoiLabel.CLASSES   the categories for each detections (see RoiLabel.clasees field)
+ * RoiImageInfo.CLASSES   the categories for each detections (see RoiLabel.clasees field)
  *                    (1 x N), or (2 x N) Tensor[Float]
- * RoiLabel.BBOXES    the bboxes, (N x 4) Tensor[Float]
- * RoiLabel.MASKS     (Optional) the mask data, Array[Tensor[Float]\]. The outer array has N
+ * RoiImageInfo.BBOXES    the bboxes, (N x 4) Tensor[Float]
+ * RoiImageInfo.MASKS     (Optional) the mask data, Array[Tensor[Float]\]. The outer array has N
  *                    elements. The inner tensor holds the data for segmentation
- * RoiLabel.ISCROWD   Whether each detection is crowd. (1 x N) Tensor[Float].
+ * RoiImageInfo.ISCROWD   Whether each detection is crowd. (1 x N) Tensor[Float].
  *                    -1: unknown, 0: not crowd, 1: is crowd
- * RoiLabel.ImageInfo  with shape (batchSize, 4), contains all images info
+ * RoiImageInfo.IMGINFO  with shape (batchSize, 4), contains all images info
  *                 (height, width, original height, original width)
  */
-class RoiMiniBatch(val input: Tensor[Float], val target: IndexedSeq[RoiLabel],
-  val isCrowd: IndexedSeq[Tensor[Float]], val imageInfo: Tensor[Float] = null)
+class RoiMiniBatch(val input: Tensor[Float], val target: Array[RoiLabel],
+  val isCrowd: Array[Tensor[Float]], val imageInfo: Tensor[Float] = null)
   extends MiniBatch[Float] {
 
   override def size(): Int = input.size(1)
@@ -216,20 +280,18 @@ class RoiMiniBatch(val input: Tensor[Float], val target: IndexedSeq[RoiLabel],
   }
 
   override def getTarget(): Table = {
-    var i = 0
-    val tables = (target, isCrowd).zipped.map { case (roiLabel, crowd) =>
-      i += 1
+    val tables = (target, isCrowd, 1 to isCrowd.length).zipped.map { case (roiLabel, crowd, i) =>
       roiLabel.toTable
-        .update(RoiLabel.ISCROWD, crowd)
-        .update(RoiLabel.IMGINFO, imageInfo.select(1, i))
+        .update(RoiImageInfo.ISCROWD, crowd)
+        .update(RoiImageInfo.IMGINFO, imageInfo.select(1, i))
     }
     T.seq(tables)
   }
 
   override def slice(offset: Int, length: Int): MiniBatch[Float] = {
     val subInput = input.narrow(1, offset, length)
-    val subTarget = target.view(offset - 1, length) // offset starts from 1
-    val subIsCrowd = isCrowd.view(offset - 1, length) // offset starts from 1
+    val subTarget = target.slice(offset - 1, length) // offset starts from 1
+    val subIsCrowd = isCrowd.slice(offset - 1, length) // offset starts from 1
     val subSize = imageInfo.narrow(1, offset, length)
     RoiMiniBatch(subInput, subTarget, subIsCrowd, subSize)
   }
@@ -241,8 +303,8 @@ class RoiMiniBatch(val input: Tensor[Float], val target: IndexedSeq[RoiLabel],
 }
 
 object RoiMiniBatch {
-  def apply(data: Tensor[Float], target: IndexedSeq[RoiLabel],
-    isCrowd: IndexedSeq[Tensor[Float]], imageInfo: Tensor[Float] = null):
+  def apply(data: Tensor[Float], target: Array[RoiLabel],
+    isCrowd: Array[Tensor[Float]], imageInfo: Tensor[Float] = null):
   RoiMiniBatch = new RoiMiniBatch(data, target, isCrowd, imageInfo)
 }
 
@@ -258,31 +320,40 @@ object RoiMiniBatch {
  * @param toRGB  if converted to RGB, default format is BGR
  */
 class RoiMTImageFeatureToBatch private[bigdl](width: Int, height: Int,
-  totalBatchSize: Int, transformer: FeatureTransformer, toRGB: Boolean = true)
+  totalBatchSize: Int, transformer: FeatureTransformer, toRGB: Boolean = false)
   extends MTImageFeatureToBatch(totalBatchSize, transformer) {
 
   private val frameLength = height * width
   private val featureData: Array[Float] = new Array[Float](batchSize * frameLength * 3)
   private val labelData: Array[RoiLabel] = new Array[RoiLabel](batchSize)
   private val isCrowdData: Array[Tensor[Float]] = new Array[Tensor[Float]](batchSize)
-  private var featureTensor: Tensor[Float] = null
+  private val imgInfoData: Tensor[Float] = Tensor[Float](batchSize, 4)
+  private var featureTensor: Tensor[Float] = Tensor[Float]()
 
   override protected def processImageFeature(img: ImageFeature, position: Int): Unit = {
     img.copyTo(featureData, position * frameLength * 3, toRGB = toRGB)
-    val isCrowd = img(RoiLabel.ISCROWD).asInstanceOf[Tensor[Float]]
+    val isCrowd = img(RoiImageInfo.ISCROWD).asInstanceOf[Tensor[Float]]
     val label = img.getLabel.asInstanceOf[RoiLabel]
     require(label.bboxes.size(1) == isCrowd.size(1), "The number of detections" +
       "in ImageFeature's ISCROWD should be equal to the number of detections in the RoiLabel")
     isCrowdData(position) = isCrowd
     labelData(position) = label
+    imgInfoData.setValue(position + 1, 1, img.getHeight())
+    imgInfoData.setValue(position + 1, 2, img.getWidth())
+    imgInfoData.setValue(position + 1, 3, img.getOriginalHeight)
+    imgInfoData.setValue(position + 1, 4, img.getOriginalWidth)
   }
 
-  override protected def createBatch(batchSize: Int): MiniBatch[Float] = {
-    if (featureTensor == null) {
-      featureTensor = Tensor(Storage[Float](featureData),
-        storageOffset = 1, size = Array(batchSize, 3, height, width))
+  override protected def createBatch(curBatchSize: Int): MiniBatch[Float] = {
+    if (featureTensor.nElement() != curBatchSize) {
+      featureTensor.set(Storage[Float](featureData),
+        storageOffset = 1, sizes = Array(curBatchSize, 3, height, width))
+    }
+    def arraySlice[T](array: Array[T]) = {
+      if (array.length == curBatchSize) array else array.slice(0, curBatchSize)
     }
-    RoiMiniBatch(featureTensor, labelData.view, isCrowdData.view)
+    RoiMiniBatch(featureTensor, arraySlice(labelData), arraySlice(isCrowdData),
+      imgInfoData.narrow(1, 1, curBatchSize))
   }
 }
 
@@ -326,7 +397,7 @@ class RoiImageFeatureToBatchWithResize private[bigdl](sizeDivisible: Int = -1, t
     imageBuffer(position).resize(3, img.getHeight(), img.getWidth())
     // save img to buffer
     img.copyTo(imageBuffer(position).storage().array(), 0, toRGB = toRGB)
-    val isCrowd = img(RoiLabel.ISCROWD).asInstanceOf[Tensor[Float]]
+    val isCrowd = img(RoiImageInfo.ISCROWD).asInstanceOf[Tensor[Float]]
     val label = img.getLabel.asInstanceOf[RoiLabel]
     if (isCrowd != null && label != null) {
       require(label.bboxes.size(1) == isCrowd.size(1), "The number of detections" +
@@ -345,11 +416,14 @@ class RoiImageFeatureToBatchWithResize private[bigdl](sizeDivisible: Int = -1, t
     if (featureTensor == null) featureTensor = Tensor()
     featureTensor.resize(batchSize, 3, height, wide).fill(0.0f)
     // copy img buffer to feature tensor
-    for (i <- 0 to (batchSize - 1)) {
+    for (i <- 0 until batchSize) {
       featureTensor.select(1, i + 1).narrow(2, 1, imageBuffer(i).size(2))
         .narrow(3, 1, imageBuffer(i).size(3)).copy(imageBuffer(i))
     }
-    RoiMiniBatch(featureTensor, labelData.view(0, batchSize),
-      isCrowdData.view(0, batchSize), imgInfoData.narrow(1, 1, batchSize))
+    def arraySlice[T](array: Array[T]) = {
+      if (array.length == batchSize) array else array.slice(0, batchSize)
+    }
+    RoiMiniBatch(featureTensor, arraySlice(labelData),
+      arraySlice(isCrowdData), imgInfoData.narrow(1, 1, batchSize))
   }
 }
diff --git a/...a/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/label/roi/RoiLabel.scala b/...a/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/label/roi/RoiLabel.scala
@@ -16,17 +16,17 @@
 
 package com.intel.analytics.bigdl.transform.vision.image.label.roi
 
-import com.intel.analytics.bigdl.dataset.segmentation.{RLEMasks, SegmentationMasks}
+import com.intel.analytics.bigdl.dataset.segmentation.{MaskUtils, SegmentationMasks, RLEMasks}
 import com.intel.analytics.bigdl.tensor.Tensor
+import com.intel.analytics.bigdl.transform.vision.image.RoiImageInfo
 import com.intel.analytics.bigdl.utils.{T, Table}
-import com.intel.analytics.bigdl.dataset.segmentation.{RLEMasks, SegmentationMasks}
 
 /**
  * image target with classes and bounding boxes
  *
  * @param classes N (class labels) or 2 * N, the first row is class labels,
  * the second line is difficults
- * @param bboxes N * 4
+ * @param bboxes N * 4, (xmin, ymin, xmax, ymax)
  * @param masks the array of annotation masks of the targets
  */
 case class RoiLabel(classes: Tensor[Float], bboxes: Tensor[Float],
@@ -45,8 +45,8 @@ case class RoiLabel(classes: Tensor[Float], bboxes: Tensor[Float],
         s"be equal to the number of mask array ${masks.length}")
     }
   } else if (classes.nElement() > 0 && classes.dim() == 2) {
-    require(classes.size(2) == bboxes.size(1), s"the number of classes ${ classes.size(2) }" +
-      s"should be equal to the number of bounding box numbers ${ bboxes.size(1) }")
+    require(classes.size(2) == bboxes.size(1), s"the number of classes ${classes.size(2)}" +
+      s"should be equal to the number of bounding box numbers ${bboxes.size(1)}")
     if (masks != null) {
       require(classes.size(2) == masks.length, s"the number of classes ${classes.size(2)}" +
         s"should be equal to the number of bounding box numbers ${masks.length}")
@@ -57,10 +57,11 @@ case class RoiLabel(classes: Tensor[Float], bboxes: Tensor[Float],
   def toTable: Table = {
     val table = T()
     if (masks != null) {
-      table(RoiLabel.MASKS) = masks.map(_.toRLE)
+      require(masks.length > 0, "The masks can either be null or a non-empty array")
+      table(RoiImageInfo.MASKS) = masks.map(_.toRLE)
     }
-    table(RoiLabel.CLASSES) = classes
-    table(RoiLabel.BBOXES) = bboxes
+    table(RoiImageInfo.CLASSES) = classes
+    table(RoiImageInfo.BBOXES) = bboxes
     table
   }
 
@@ -70,23 +71,6 @@ case class RoiLabel(classes: Tensor[Float], bboxes: Tensor[Float],
 }
 
 object RoiLabel {
-  val CLASSES = "classes"
-  val BBOXES = "bboxes"
-  val MASKS = "masks"
-  // ISCROWD and ORIGSIZE are stored in ImageFeature
-  val ISCROWD = "is_crowd"
-  val IMGINFO = "imgInfo"
-  val SCORES = "scores"
-
-
-  def getClasses(tab: Table): Tensor[Float] = tab[Tensor[Float]](CLASSES)
-  def getBBoxes(tab: Table): Tensor[Float] = tab[Tensor[Float]](BBOXES)
-  def getMasks(tab: Table): Array[RLEMasks] = tab[Array[RLEMasks]](MASKS)
-  def getIsCrowd(tab: Table): Tensor[Float] = tab[Tensor[Float]](ISCROWD)
-  def getScores(tab: Table): Tensor[Float] = tab[Tensor[Float]](SCORES)
-  def getImgInfo(tab: Table): Tensor[Float] = tab[Tensor[Float]](IMGINFO)
-
-
   def fromTensor(tensor: Tensor[Float]): RoiLabel = {
     val label = tensor.narrow(2, 1, 2).transpose(1, 2).contiguous()
     val rois = tensor.narrow(2, 3, 4)