diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/DataSet.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/DataSet.scala
index 0b491a63c51..c6e28d5ff59 100644
--- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/DataSet.scala
+++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/DataSet.scala
@@ -24,7 +24,7 @@ import com.intel.analytics.bigdl.dataset.image.{LabeledBGRImage, _}
 import com.intel.analytics.bigdl.dataset.segmentation.{COCODataset, COCODeserializer}
 import com.intel.analytics.bigdl.tensor.Tensor
 import com.intel.analytics.bigdl.transform.vision.image.label.roi.RoiLabel
-import com.intel.analytics.bigdl.transform.vision.image.{DistributedImageFrame, ImageFeature, ImageFrame, LocalImageFrame}
+import com.intel.analytics.bigdl.transform.vision.image.{DistributedImageFrame, ImageFeature, ImageFrame, LocalImageFrame, RoiImageInfo}
 import com.intel.analytics.bigdl.utils.{Engine, RandomGenerator, T}
 import java.awt.Color
 import java.awt.image.{BufferedImage, DataBufferByte}
@@ -643,7 +643,7 @@ object DataSet {
           require(rawdata.length == height * width * 3)
           val imf = ImageFeature(rawdata, RoiLabel(labelClasses, bboxes, masks), fileName)
           imf(ImageFeature.originalSize) = (height, width, 3)
-          imf(RoiLabel.ISCROWD) = isCrowd
+          imf(RoiImageInfo.ISCROWD) = isCrowd
           imf
         }
         .coalesce(num)
diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/segmentation/MaskUtils.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/segmentation/MaskUtils.scala
index 39264a64de3..e7773a6e69f 100644
--- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/segmentation/MaskUtils.scala
+++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/segmentation/MaskUtils.scala
@@ -22,9 +22,14 @@ import scala.collection.mutable.ArrayBuffer
 
 abstract class SegmentationMasks extends Serializable {
   /**
-   * Convert to a RLE encoded tensor
+   * Convert to a RLE encoded masks
    */
   def toRLE: RLEMasks
+
+  /**
+   * Get the height and width
+   */
+  def size: (Int, Int)
 }
 
 /**
@@ -40,6 +45,11 @@ class PolyMasks(val poly: Array[Array[Float]], val height: Int, val width: Int)
     require(height > 0 && width > 0, "the height and width must > 0 for toRLE")
     MaskUtils.mergeRLEs(MaskUtils.poly2RLE(this, height, width), false)
   }
+
+  /**
+   * Get the height and width
+   */
+  override def size: (Int, Int) = (height, width)
 }
 
 object PolyMasks {
@@ -69,6 +79,8 @@ class RLEMasks(val counts: Array[Int], val height: Int, val width: Int)
   extends SegmentationMasks {
   override def toRLE: RLEMasks = this
 
+  override def size: (Int, Int) = (height, width)
+
   // cached bbox value
   @transient
   lazy val bbox: (Float, Float, Float, Float) = MaskUtils.rleToOneBbox(this)
diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/MTImageFeatureToBatch.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/MTImageFeatureToBatch.scala
index a88741aa6cb..d8fb0476613 100644
--- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/MTImageFeatureToBatch.scala
+++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/MTImageFeatureToBatch.scala
@@ -15,8 +15,8 @@
  */
 package com.intel.analytics.bigdl.transform.vision.image
 
+import com.intel.analytics.bigdl.dataset.segmentation.RLEMasks
 import java.util.concurrent.atomic.AtomicInteger
-
 import com.intel.analytics.bigdl.dataset.{MiniBatch, Sample, Transformer, Utils}
 import com.intel.analytics.bigdl.nn.abstractnn.Activity
 import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
@@ -24,9 +24,6 @@ import com.intel.analytics.bigdl.tensor.{Storage, Tensor}
 import com.intel.analytics.bigdl.transform.vision.image.label.roi.RoiLabel
 import com.intel.analytics.bigdl.utils.{Engine, T, Table}
 
-import scala.collection.mutable.IndexedSeq
-import scala.reflect.ClassTag
-
 object MTImageFeatureToBatch {
   /**
    * The transformer from ImageFeature to mini-batches
@@ -40,7 +37,7 @@ object MTImageFeatureToBatch {
    * @return
    */
   def apply(width: Int, height: Int, batchSize: Int,
-            transformer: FeatureTransformer, toRGB: Boolean = true, extractRoi: Boolean = false)
+            transformer: FeatureTransformer, toRGB: Boolean = false, extractRoi: Boolean = false)
   : MTImageFeatureToBatch = {
     if (extractRoi) {
       new RoiMTImageFeatureToBatch (
@@ -162,7 +159,7 @@ private class PreFetch extends Transformer[ImageFeature, ImageFeature] {
  * @param toRGB  if converted to RGB, default format is BGR
  */
 class ClassificationMTImageFeatureToBatch private[bigdl](width: Int, height: Int,
-  totalBatchSize: Int, transformer: FeatureTransformer, toRGB: Boolean = true)
+  totalBatchSize: Int, transformer: FeatureTransformer, toRGB: Boolean = false)
   extends MTImageFeatureToBatch(totalBatchSize, transformer) {
 
   private val frameLength = height * width
@@ -188,6 +185,73 @@ class ClassificationMTImageFeatureToBatch private[bigdl](width: Int, height: Int
   }
 }
 
+
+object RoiImageInfo {
+  // the keys in the target table
+  // fields from RoiLabel
+  val CLASSES = "classes"
+  val BBOXES = "bboxes"
+  val MASKS = "masks"
+  // ISCROWD and ORIGSIZE are stored in ImageFeature
+  val ISCROWD = "is_crowd"
+  val ORIGSIZE = "orig_size"
+  val SCORES = "scores"
+  val IMGINFO = "imginfo"
+
+  /**
+   * Get the output score tensor from the table.
+   *    (1 x N) tensor for N detections
+   *
+   * @param tab
+   * @return
+   */
+  def getScores(tab: Table): Tensor[Float] = tab[Tensor[Float]](SCORES)
+
+  /**
+   * Get the class label tensor from the table. See RoiLabel.classes
+   *    the categories for each detections (see RoiLabel.clasees field)
+   *    (1 x N), or (2 x N) Tensor[Float]
+   *
+   * @param tab
+   * @return
+   */
+  def getClasses(tab: Table): Tensor[Float] = tab[Tensor[Float]](CLASSES)
+
+  /**
+   * Get the bbox tensor from the table. See RoiLabel.bboxes
+   * @param tab
+   * @return
+   */
+  def getBBoxes(tab: Table): Tensor[Float] = tab[Tensor[Float]](BBOXES)
+
+  /**
+   * Get the (optional) mask data from the table. See RoiLabel.masks
+   * @param tab
+   * @return
+   */
+  def getMasks(tab: Table): Array[RLEMasks] = tab[Array[RLEMasks]](MASKS)
+
+  /**
+   * Get the isCrowd tensor from the table. Should be 1 x N vector (N is the # of detections)
+   * @param tab
+   * @return
+   */
+  def getIsCrowd(tab: Table): Tensor[Float] = tab[Tensor[Float]](ISCROWD)
+
+  /**
+   * Get the size of the image before resizing
+   * @return (height, width, channel)
+   */
+  def getOrigSize(tab: Table): (Int, Int, Int) = tab[(Int, Int, Int)](ORIGSIZE)
+
+  /**
+   * Get the isCrowd tensor from the table. Should be 1 x N vector (N is the # of detections)
+   * @param tab
+   * @return
+   */
+  def getImageInfo(tab: Table): Tensor[Float] = tab[Tensor[Float]](IMGINFO)
+
+}
 /**
  * A batch of images with flattened RoiLabels
  * the getTarget() returns a Table with key from 1 to batchSize. Each key in the table is mapped to
@@ -195,18 +259,18 @@ class ClassificationMTImageFeatureToBatch private[bigdl](width: Int, height: Int
  * info for one image (assume the image has N detections). The annotation table has
  *
  * Key                Value
- * RoiLabel.CLASSES   the categories for each detections (see RoiLabel.clasees field)
+ * RoiImageInfo.CLASSES   the categories for each detections (see RoiLabel.clasees field)
  *                    (1 x N), or (2 x N) Tensor[Float]
- * RoiLabel.BBOXES    the bboxes, (N x 4) Tensor[Float]
- * RoiLabel.MASKS     (Optional) the mask data, Array[Tensor[Float]\]. The outer array has N
+ * RoiImageInfo.BBOXES    the bboxes, (N x 4) Tensor[Float]
+ * RoiImageInfo.MASKS     (Optional) the mask data, Array[Tensor[Float]\]. The outer array has N
  *                    elements. The inner tensor holds the data for segmentation
- * RoiLabel.ISCROWD   Whether each detection is crowd. (1 x N) Tensor[Float].
+ * RoiImageInfo.ISCROWD   Whether each detection is crowd. (1 x N) Tensor[Float].
  *                    -1: unknown, 0: not crowd, 1: is crowd
- * RoiLabel.ImageInfo  with shape (batchSize, 4), contains all images info
+ * RoiImageInfo.IMGINFO  with shape (batchSize, 4), contains all images info
  *                 (height, width, original height, original width)
  */
-class RoiMiniBatch(val input: Tensor[Float], val target: IndexedSeq[RoiLabel],
-  val isCrowd: IndexedSeq[Tensor[Float]], val imageInfo: Tensor[Float] = null)
+class RoiMiniBatch(val input: Tensor[Float], val target: Array[RoiLabel],
+  val isCrowd: Array[Tensor[Float]], val imageInfo: Tensor[Float] = null)
   extends MiniBatch[Float] {
 
   override def size(): Int = input.size(1)
@@ -216,20 +280,18 @@ class RoiMiniBatch(val input: Tensor[Float], val target: IndexedSeq[RoiLabel],
   }
 
   override def getTarget(): Table = {
-    var i = 0
-    val tables = (target, isCrowd).zipped.map { case (roiLabel, crowd) =>
-      i += 1
+    val tables = (target, isCrowd, 1 to isCrowd.length).zipped.map { case (roiLabel, crowd, i) =>
       roiLabel.toTable
-        .update(RoiLabel.ISCROWD, crowd)
-        .update(RoiLabel.IMGINFO, imageInfo.select(1, i))
+        .update(RoiImageInfo.ISCROWD, crowd)
+        .update(RoiImageInfo.IMGINFO, imageInfo.select(1, i))
     }
     T.seq(tables)
   }
 
   override def slice(offset: Int, length: Int): MiniBatch[Float] = {
     val subInput = input.narrow(1, offset, length)
-    val subTarget = target.view(offset - 1, length) // offset starts from 1
-    val subIsCrowd = isCrowd.view(offset - 1, length) // offset starts from 1
+    val subTarget = target.slice(offset - 1, length) // offset starts from 1
+    val subIsCrowd = isCrowd.slice(offset - 1, length) // offset starts from 1
     val subSize = imageInfo.narrow(1, offset, length)
     RoiMiniBatch(subInput, subTarget, subIsCrowd, subSize)
   }
@@ -241,8 +303,8 @@ class RoiMiniBatch(val input: Tensor[Float], val target: IndexedSeq[RoiLabel],
 }
 
 object RoiMiniBatch {
-  def apply(data: Tensor[Float], target: IndexedSeq[RoiLabel],
-    isCrowd: IndexedSeq[Tensor[Float]], imageInfo: Tensor[Float] = null):
+  def apply(data: Tensor[Float], target: Array[RoiLabel],
+    isCrowd: Array[Tensor[Float]], imageInfo: Tensor[Float] = null):
   RoiMiniBatch = new RoiMiniBatch(data, target, isCrowd, imageInfo)
 }
 
@@ -258,31 +320,40 @@ object RoiMiniBatch {
  * @param toRGB  if converted to RGB, default format is BGR
  */
 class RoiMTImageFeatureToBatch private[bigdl](width: Int, height: Int,
-  totalBatchSize: Int, transformer: FeatureTransformer, toRGB: Boolean = true)
+  totalBatchSize: Int, transformer: FeatureTransformer, toRGB: Boolean = false)
   extends MTImageFeatureToBatch(totalBatchSize, transformer) {
 
   private val frameLength = height * width
   private val featureData: Array[Float] = new Array[Float](batchSize * frameLength * 3)
   private val labelData: Array[RoiLabel] = new Array[RoiLabel](batchSize)
   private val isCrowdData: Array[Tensor[Float]] = new Array[Tensor[Float]](batchSize)
-  private var featureTensor: Tensor[Float] = null
+  private val imgInfoData: Tensor[Float] = Tensor[Float](batchSize, 4)
+  private var featureTensor: Tensor[Float] = Tensor[Float]()
 
   override protected def processImageFeature(img: ImageFeature, position: Int): Unit = {
     img.copyTo(featureData, position * frameLength * 3, toRGB = toRGB)
-    val isCrowd = img(RoiLabel.ISCROWD).asInstanceOf[Tensor[Float]]
+    val isCrowd = img(RoiImageInfo.ISCROWD).asInstanceOf[Tensor[Float]]
     val label = img.getLabel.asInstanceOf[RoiLabel]
     require(label.bboxes.size(1) == isCrowd.size(1), "The number of detections" +
       "in ImageFeature's ISCROWD should be equal to the number of detections in the RoiLabel")
     isCrowdData(position) = isCrowd
     labelData(position) = label
+    imgInfoData.setValue(position + 1, 1, img.getHeight())
+    imgInfoData.setValue(position + 1, 2, img.getWidth())
+    imgInfoData.setValue(position + 1, 3, img.getOriginalHeight)
+    imgInfoData.setValue(position + 1, 4, img.getOriginalWidth)
   }
 
-  override protected def createBatch(batchSize: Int): MiniBatch[Float] = {
-    if (featureTensor == null) {
-      featureTensor = Tensor(Storage[Float](featureData),
-        storageOffset = 1, size = Array(batchSize, 3, height, width))
+  override protected def createBatch(curBatchSize: Int): MiniBatch[Float] = {
+    if (featureTensor.nElement() != curBatchSize) {
+      featureTensor.set(Storage[Float](featureData),
+        storageOffset = 1, sizes = Array(curBatchSize, 3, height, width))
+    }
+    def arraySlice[T](array: Array[T]) = {
+      if (array.length == curBatchSize) array else array.slice(0, curBatchSize)
     }
-    RoiMiniBatch(featureTensor, labelData.view, isCrowdData.view)
+    RoiMiniBatch(featureTensor, arraySlice(labelData), arraySlice(isCrowdData),
+      imgInfoData.narrow(1, 1, curBatchSize))
   }
 }
 
@@ -326,7 +397,7 @@ class RoiImageFeatureToBatchWithResize private[bigdl](sizeDivisible: Int = -1, t
     imageBuffer(position).resize(3, img.getHeight(), img.getWidth())
     // save img to buffer
     img.copyTo(imageBuffer(position).storage().array(), 0, toRGB = toRGB)
-    val isCrowd = img(RoiLabel.ISCROWD).asInstanceOf[Tensor[Float]]
+    val isCrowd = img(RoiImageInfo.ISCROWD).asInstanceOf[Tensor[Float]]
     val label = img.getLabel.asInstanceOf[RoiLabel]
     if (isCrowd != null && label != null) {
       require(label.bboxes.size(1) == isCrowd.size(1), "The number of detections" +
@@ -345,11 +416,14 @@ class RoiImageFeatureToBatchWithResize private[bigdl](sizeDivisible: Int = -1, t
     if (featureTensor == null) featureTensor = Tensor()
     featureTensor.resize(batchSize, 3, height, wide).fill(0.0f)
     // copy img buffer to feature tensor
-    for (i <- 0 to (batchSize - 1)) {
+    for (i <- 0 until batchSize) {
       featureTensor.select(1, i + 1).narrow(2, 1, imageBuffer(i).size(2))
         .narrow(3, 1, imageBuffer(i).size(3)).copy(imageBuffer(i))
     }
-    RoiMiniBatch(featureTensor, labelData.view(0, batchSize),
-      isCrowdData.view(0, batchSize), imgInfoData.narrow(1, 1, batchSize))
+    def arraySlice[T](array: Array[T]) = {
+      if (array.length == batchSize) array else array.slice(0, batchSize)
+    }
+    RoiMiniBatch(featureTensor, arraySlice(labelData),
+      arraySlice(isCrowdData), imgInfoData.narrow(1, 1, batchSize))
   }
 }
diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/label/roi/RoiLabel.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/label/roi/RoiLabel.scala
index 9bfd21a9c51..cb038780450 100644
--- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/label/roi/RoiLabel.scala
+++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/label/roi/RoiLabel.scala
@@ -16,17 +16,17 @@
 
 package com.intel.analytics.bigdl.transform.vision.image.label.roi
 
-import com.intel.analytics.bigdl.dataset.segmentation.{RLEMasks, SegmentationMasks}
+import com.intel.analytics.bigdl.dataset.segmentation.{MaskUtils, SegmentationMasks, RLEMasks}
 import com.intel.analytics.bigdl.tensor.Tensor
+import com.intel.analytics.bigdl.transform.vision.image.RoiImageInfo
 import com.intel.analytics.bigdl.utils.{T, Table}
-import com.intel.analytics.bigdl.dataset.segmentation.{RLEMasks, SegmentationMasks}
 
 /**
  * image target with classes and bounding boxes
  *
  * @param classes N (class labels) or 2 * N, the first row is class labels,
  * the second line is difficults
- * @param bboxes N * 4
+ * @param bboxes N * 4, (xmin, ymin, xmax, ymax)
  * @param masks the array of annotation masks of the targets
  */
 case class RoiLabel(classes: Tensor[Float], bboxes: Tensor[Float],
@@ -45,8 +45,8 @@ case class RoiLabel(classes: Tensor[Float], bboxes: Tensor[Float],
         s"be equal to the number of mask array ${masks.length}")
     }
   } else if (classes.nElement() > 0 && classes.dim() == 2) {
-    require(classes.size(2) == bboxes.size(1), s"the number of classes ${ classes.size(2) }" +
-      s"should be equal to the number of bounding box numbers ${ bboxes.size(1) }")
+    require(classes.size(2) == bboxes.size(1), s"the number of classes ${classes.size(2)}" +
+      s"should be equal to the number of bounding box numbers ${bboxes.size(1)}")
     if (masks != null) {
       require(classes.size(2) == masks.length, s"the number of classes ${classes.size(2)}" +
         s"should be equal to the number of bounding box numbers ${masks.length}")
@@ -57,10 +57,11 @@ case class RoiLabel(classes: Tensor[Float], bboxes: Tensor[Float],
   def toTable: Table = {
     val table = T()
     if (masks != null) {
-      table(RoiLabel.MASKS) = masks.map(_.toRLE)
+      require(masks.length > 0, "The masks can either be null or a non-empty array")
+      table(RoiImageInfo.MASKS) = masks.map(_.toRLE)
     }
-    table(RoiLabel.CLASSES) = classes
-    table(RoiLabel.BBOXES) = bboxes
+    table(RoiImageInfo.CLASSES) = classes
+    table(RoiImageInfo.BBOXES) = bboxes
     table
   }
 
@@ -70,23 +71,6 @@ case class RoiLabel(classes: Tensor[Float], bboxes: Tensor[Float],
 }
 
 object RoiLabel {
-  val CLASSES = "classes"
-  val BBOXES = "bboxes"
-  val MASKS = "masks"
-  // ISCROWD and ORIGSIZE are stored in ImageFeature
-  val ISCROWD = "is_crowd"
-  val IMGINFO = "imgInfo"
-  val SCORES = "scores"
-
-
-  def getClasses(tab: Table): Tensor[Float] = tab[Tensor[Float]](CLASSES)
-  def getBBoxes(tab: Table): Tensor[Float] = tab[Tensor[Float]](BBOXES)
-  def getMasks(tab: Table): Array[RLEMasks] = tab[Array[RLEMasks]](MASKS)
-  def getIsCrowd(tab: Table): Tensor[Float] = tab[Tensor[Float]](ISCROWD)
-  def getScores(tab: Table): Tensor[Float] = tab[Tensor[Float]](SCORES)
-  def getImgInfo(tab: Table): Tensor[Float] = tab[Tensor[Float]](IMGINFO)
-
-
   def fromTensor(tensor: Tensor[Float]): RoiLabel = {
     val label = tensor.narrow(2, 1, 2).transpose(1, 2).contiguous()
     val rois = tensor.narrow(2, 3, 4)
diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/MaskRCNN.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/MaskRCNN.scala
index 4d4f1d59fd2..34bf3aa7cf4 100644
--- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/MaskRCNN.scala
+++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/MaskRCNN.scala
@@ -23,12 +23,12 @@ import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, Activity}
 import com.intel.analytics.bigdl.serialization.Bigdl.{AttrValue, BigDLModule}
 import com.intel.analytics.bigdl.tensor.Tensor
 import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
+import com.intel.analytics.bigdl.transform.vision.image.RoiImageInfo
 import com.intel.analytics.bigdl.transform.vision.image.label.roi.RoiLabel
 import com.intel.analytics.bigdl.transform.vision.image.util.BboxUtil
 import com.intel.analytics.bigdl.utils.serializer._
 import com.intel.analytics.bigdl.utils.serializer.converters.DataConverter
 import com.intel.analytics.bigdl.utils.{T, Table}
-
 import scala.reflect.ClassTag
 import scala.reflect.runtime._
 
@@ -243,10 +243,10 @@ class MaskRCNN(val inChannels: Int,
         }
         start += boxNumber
 
-        postOutput.update(RoiLabel.MASKS, masksRLE)
-        postOutput.update(RoiLabel.BBOXES, bboxPerImg)
-        postOutput.update(RoiLabel.CLASSES, classPerImg)
-        postOutput.update(RoiLabel.SCORES, scorePerImg)
+        postOutput.update(RoiImageInfo.MASKS, masksRLE)
+        postOutput.update(RoiImageInfo.BBOXES, bboxPerImg)
+        postOutput.update(RoiImageInfo.CLASSES, classPerImg)
+        postOutput.update(RoiImageInfo.SCORES, scorePerImg)
       }
 
       output(i + 1) = postOutput
diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/ValidationMethod.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/ValidationMethod.scala
index 286b05bcfba..f4d344c1e81 100644
--- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/ValidationMethod.scala
+++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/ValidationMethod.scala
@@ -23,6 +23,7 @@ import com.intel.analytics.bigdl.nn.AbsCriterion
 import com.intel.analytics.bigdl.nn.abstractnn.Activity
 import com.intel.analytics.bigdl.tensor.Tensor
 import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
+import com.intel.analytics.bigdl.transform.vision.image.RoiImageInfo
 import com.intel.analytics.bigdl.transform.vision.image.label.roi.RoiLabel
 import com.intel.analytics.bigdl.utils.Table
 import org.apache.commons.lang3.SerializationUtils
@@ -313,10 +314,10 @@ object MAPUtil {
       val gtImage = new ArrayBuffer[GroundTruthRegion]()
       val roiLabel = gtTable[Table](i)
       if (roiLabel.length() > 0) {
-        val bbox = RoiLabel.getBBoxes(roiLabel)
-        val tclasses = RoiLabel.getClasses(roiLabel)
-        val isCrowd = RoiLabel.getIsCrowd(roiLabel)
-        val masks = if (isSegmentation) RoiLabel.getMasks(roiLabel) else null
+        val bbox = RoiImageInfo.getBBoxes(roiLabel)
+        val tclasses = RoiImageInfo.getClasses(roiLabel)
+        val isCrowd = RoiImageInfo.getIsCrowd(roiLabel)
+        val masks = if (isSegmentation) RoiImageInfo.getMasks(roiLabel) else null
         val bboxCnt = bbox.size(1)
         require(bboxCnt == tclasses.size(1), "CLASSES of target tables should have the" +
           "same size of the bbox counts")
@@ -705,11 +706,11 @@ class MeanAveragePrecisionObjectDetection[T: ClassTag](
           val imgOut = outTable[Table](imgId)
           // if the image contains empty predictions, do nothing
           if (imgOut.length() > 0) {
-            val bboxes = RoiLabel.getBBoxes(imgOut)
-            val scores = RoiLabel.getScores(imgOut)
-            val labels = RoiLabel.getClasses(imgOut)
+            val bboxes = RoiImageInfo.getBBoxes(imgOut)
+            val scores = RoiImageInfo.getScores(imgOut)
+            val labels = RoiImageInfo.getClasses(imgOut)
             require(bboxes.dim() == 2, "the bbox tensor should have 2 dimensions")
-            val masks = if (isSegmentation) Some(RoiLabel.getMasks(imgOut)) else None
+            val masks = if (isSegmentation) Some(RoiImageInfo.getMasks(imgOut)) else None
             val batchSize = bboxes.size(1)
             require(batchSize == labels.size(1), "CLASSES of target tables should have the" +
               "same size of the bbox counts")
diff --git a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dataset/DataSetSpec.scala b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dataset/DataSetSpec.scala
index 71413128b7c..f05ae64decd 100644
--- a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dataset/DataSetSpec.scala
+++ b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dataset/DataSetSpec.scala
@@ -23,7 +23,7 @@ import com.intel.analytics.bigdl.dataset.image._
 import com.intel.analytics.bigdl.dataset.segmentation.{COCODataset, COCOPoly, COCORLE, PolyMasks, RLEMasks}
 import com.intel.analytics.bigdl.models.utils.COCOSeqFileGenerator
 import com.intel.analytics.bigdl.tensor.Tensor
-import com.intel.analytics.bigdl.transform.vision.image.ImageFeature
+import com.intel.analytics.bigdl.transform.vision.image.{ImageFeature, RoiImageInfo}
 import com.intel.analytics.bigdl.transform.vision.image.label.roi.RoiLabel
 import com.intel.analytics.bigdl.utils.{Engine, RandomGenerator, SparkContextLifeCycle, TestUtils}
 import java.awt.image.DataBufferByte
@@ -99,7 +99,7 @@ class DataSetSpec extends SparkContextLifeCycle with Matchers {
       .data(false)
       .map(imf => {
         (imf(ImageFeature.uri).asInstanceOf[String], imf.getOriginalSize, imf.getLabel[RoiLabel],
-          imf[Tensor[Float]](RoiLabel.ISCROWD), imf[Array[Byte]](ImageFeature.bytes))
+          imf[Tensor[Float]](RoiImageInfo.ISCROWD), imf[Array[Byte]](ImageFeature.bytes))
       })
       .collect()
       .foreach({ case (uri, size, label, iscrowd, bytes) =>
diff --git a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/models/maskrcnn/MaskRCNNSpec.scala b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/models/maskrcnn/MaskRCNNSpec.scala
index 276962d6178..0a8f147eb73 100644
--- a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/models/maskrcnn/MaskRCNNSpec.scala
+++ b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/models/maskrcnn/MaskRCNNSpec.scala
@@ -20,6 +20,7 @@ import com.intel.analytics.bigdl.Module
 import com.intel.analytics.bigdl.dataset.segmentation.RLEMasks
 import com.intel.analytics.bigdl.nn.Nms
 import com.intel.analytics.bigdl.tensor.Tensor
+import com.intel.analytics.bigdl.transform.vision.image.RoiImageInfo
 import com.intel.analytics.bigdl.transform.vision.image.label.roi.RoiLabel
 import com.intel.analytics.bigdl.utils.serializer.ModuleSerializationTest
 import com.intel.analytics.bigdl.utils.{RandomGenerator, T, Table}
@@ -63,29 +64,29 @@ class MaskRCNNSpec extends FlatSpec with Matchers {
     val first = output[Table](1)
     val second = output[Table](2)
 
-    first.get[Tensor[Float]](RoiLabel.BBOXES) should be(
-      output1.get[Tensor[Float]](RoiLabel.BBOXES))
-    first.get[Tensor[Float]](RoiLabel.CLASSES) should be(
-      output1.get[Tensor[Float]](RoiLabel.CLASSES))
-    first.get[Tensor[Float]](RoiLabel.SCORES) should be(
-      output1.get[Tensor[Float]](RoiLabel.SCORES))
+    first.get[Tensor[Float]](RoiImageInfo.BBOXES) should be(
+      output1.get[Tensor[Float]](RoiImageInfo.BBOXES))
+    first.get[Tensor[Float]](RoiImageInfo.CLASSES) should be(
+      output1.get[Tensor[Float]](RoiImageInfo.CLASSES))
+    first.get[Tensor[Float]](RoiImageInfo.SCORES) should be(
+      output1.get[Tensor[Float]](RoiImageInfo.SCORES))
 
-    second.get[Tensor[Float]](RoiLabel.BBOXES) should be(
-      output2.get[Tensor[Float]](RoiLabel.BBOXES))
-    second.get[Tensor[Float]](RoiLabel.CLASSES) should be(
-      output2.get[Tensor[Float]](RoiLabel.CLASSES))
-    second.get[Tensor[Float]](RoiLabel.SCORES) should be(
-      output2.get[Tensor[Float]](RoiLabel.SCORES))
+    second.get[Tensor[Float]](RoiImageInfo.BBOXES) should be(
+      output2.get[Tensor[Float]](RoiImageInfo.BBOXES))
+    second.get[Tensor[Float]](RoiImageInfo.CLASSES) should be(
+      output2.get[Tensor[Float]](RoiImageInfo.CLASSES))
+    second.get[Tensor[Float]](RoiImageInfo.SCORES) should be(
+      output2.get[Tensor[Float]](RoiImageInfo.SCORES))
 
     // for masks
-    val firstMasks = first.get[Array[RLEMasks]](RoiLabel.MASKS).get
-    val expectedMasks = output1.get[Array[RLEMasks]](RoiLabel.MASKS).get
+    val firstMasks = first.get[Array[RLEMasks]](RoiImageInfo.MASKS).get
+    val expectedMasks = output1.get[Array[RLEMasks]](RoiImageInfo.MASKS).get
     for (i <- 0 to firstMasks.length - 1) {
       firstMasks(i).counts should be(expectedMasks(i).counts)
     }
 
-    val secondMasks = second.get[Array[RLEMasks]](RoiLabel.MASKS).get
-    val expectedMasks2 = output2.get[Array[RLEMasks]](RoiLabel.MASKS).get
+    val secondMasks = second.get[Array[RLEMasks]](RoiImageInfo.MASKS).get
+    val expectedMasks2 = output2.get[Array[RLEMasks]](RoiImageInfo.MASKS).get
 
     for (i <- 0 to secondMasks.length - 1) {
       secondMasks(i).counts should be(expectedMasks2(i).counts)
diff --git a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/optim/ValidationSpec.scala b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/optim/ValidationSpec.scala
index 37423674a01..aac1b0cd141 100644
--- a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/optim/ValidationSpec.scala
+++ b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/optim/ValidationSpec.scala
@@ -18,6 +18,7 @@ package com.intel.analytics.bigdl.optim
 
 import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
 import com.intel.analytics.bigdl.tensor.{Storage, Tensor}
+import com.intel.analytics.bigdl.transform.vision.image.RoiImageInfo
 import com.intel.analytics.bigdl.transform.vision.image.label.roi.RoiLabel
 import com.intel.analytics.bigdl.utils.T
 import org.scalatest.{FlatSpec, Matchers}
@@ -247,9 +248,9 @@ class ValidationSpec extends FlatSpec with Matchers {
 
     val target = T(
         T()
-          .update(RoiLabel.ISCROWD, Tensor[Float](T(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)))
-          .update(RoiLabel.CLASSES, Tensor[Float](T(0, 0, 0, 0, 0, 1, 1, 1, 1, 1)))
-          .update(RoiLabel.BBOXES, Tensor[Float](T(
+          .update(RoiImageInfo.ISCROWD, Tensor[Float](T(0, 0, 0, 0, 0, 0, 0, 0, 0, 0)))
+          .update(RoiImageInfo.CLASSES, Tensor[Float](T(0, 0, 0, 0, 0, 1, 1, 1, 1, 1)))
+          .update(RoiImageInfo.BBOXES, Tensor[Float](T(
               T(100, 100, 200, 200),
               T(300, 100, 400, 200),
               T(100, 300, 200, 400),
@@ -271,8 +272,8 @@ class ValidationSpec extends FlatSpec with Matchers {
 
     val outputTable = T(
       T()
-        .update(RoiLabel.CLASSES, Tensor[Float](T(0, 0, 0, 0, 1, 1, 1, 1)))
-        .update(RoiLabel.BBOXES, Tensor[Float](T(
+        .update(RoiImageInfo.CLASSES, Tensor[Float](T(0, 0, 0, 0, 1, 1, 1, 1)))
+        .update(RoiImageInfo.BBOXES, Tensor[Float](T(
           T(110, 90, 210, 190),
           T(310, 110, 410, 210),
           T(320, 290, 420, 390),
@@ -283,7 +284,7 @@ class ValidationSpec extends FlatSpec with Matchers {
           T(1210, 1310, 1290, 1410)
         ))
         )
-        .update(RoiLabel.SCORES, Tensor[Float](T(1, 2, 4, 3, 1, 3, 4, 2)))
+        .update(RoiImageInfo.SCORES, Tensor[Float](T(1, 2, 4, 3, 1, 3, 4, 2)))
     )
     val v2 = new MeanAveragePrecisionObjectDetection(3)
     val result2 = v2(outputTable, target)
@@ -294,9 +295,9 @@ class ValidationSpec extends FlatSpec with Matchers {
   "MeanAveragePrecisionObjectDetection" should "be correct on empty detections" in {
     val target = T(
       T()
-        .update(RoiLabel.ISCROWD, Tensor[Float](T(0, 0, 0, 0, 0)))
-        .update(RoiLabel.CLASSES, Tensor[Float](T(0, 0, 0, 0, 0)))
-        .update(RoiLabel.BBOXES, Tensor[Float](T(
+        .update(RoiImageInfo.ISCROWD, Tensor[Float](T(0, 0, 0, 0, 0)))
+        .update(RoiImageInfo.CLASSES, Tensor[Float](T(0, 0, 0, 0, 0)))
+        .update(RoiImageInfo.BBOXES, Tensor[Float](T(
           T(100, 100, 200, 200),
           T(300, 100, 400, 200),
           T(100, 300, 200, 400),
@@ -314,9 +315,9 @@ class ValidationSpec extends FlatSpec with Matchers {
   "MeanAveragePrecisionObjectDetection" should "be correct on empty targets" in {
     val target = T(
       T()
-        .update(RoiLabel.ISCROWD, Tensor[Float](T(0, 0, 0, 0, 0)))
-        .update(RoiLabel.CLASSES, Tensor[Float](T(0, 0, 0, 0, 0)))
-        .update(RoiLabel.BBOXES, Tensor[Float](T(
+        .update(RoiImageInfo.ISCROWD, Tensor[Float](T(0, 0, 0, 0, 0)))
+        .update(RoiImageInfo.CLASSES, Tensor[Float](T(0, 0, 0, 0, 0)))
+        .update(RoiImageInfo.BBOXES, Tensor[Float](T(
           T(100, 100, 200, 200),
           T(300, 100, 400, 200),
           T(100, 300, 200, 400),
@@ -329,25 +330,25 @@ class ValidationSpec extends FlatSpec with Matchers {
     )
     val outputTable = T(
       T()
-        .update(RoiLabel.CLASSES, Tensor[Float](T(0, 0, 0, 0)))
-        .update(RoiLabel.BBOXES, Tensor[Float](T(
+        .update(RoiImageInfo.CLASSES, Tensor[Float](T(0, 0, 0, 0)))
+        .update(RoiImageInfo.BBOXES, Tensor[Float](T(
           T(110, 90, 210, 190),
           T(310, 110, 410, 210),
           T(320, 290, 420, 390),
           T(210, 310, 290, 410)
         ))
         )
-        .update(RoiLabel.SCORES, Tensor[Float](T(1, 2, 9, 7))),
+        .update(RoiImageInfo.SCORES, Tensor[Float](T(1, 2, 9, 7))),
       T()
-        .update(RoiLabel.CLASSES, Tensor[Float](T(0, 0, 0, 0)))
-        .update(RoiLabel.BBOXES, Tensor[Float](T(
+        .update(RoiImageInfo.CLASSES, Tensor[Float](T(0, 0, 0, 0)))
+        .update(RoiImageInfo.BBOXES, Tensor[Float](T(
           T(1110, 1090, 1210, 1190),
           T(1310, 1110, 1410, 1210),
           T(1320, 1290, 1420, 1390),
           T(1210, 1310, 1290, 1410)
         ))
         )
-        .update(RoiLabel.SCORES, Tensor[Float](T(0, 5, 4, 8)))
+        .update(RoiImageInfo.SCORES, Tensor[Float](T(0, 5, 4, 8)))
     )
     val v = new MeanAveragePrecisionObjectDetection[Float](3)
     val result = v(outputTable, target)
diff --git a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/transform/vision/image/MTImageFeatureToBatchSpec.scala b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/transform/vision/image/MTImageFeatureToBatchSpec.scala
index 191d0df86cc..e8e5c77e232 100644
--- a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/transform/vision/image/MTImageFeatureToBatchSpec.scala
+++ b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/transform/vision/image/MTImageFeatureToBatchSpec.scala
@@ -17,6 +17,7 @@
 package com.intel.analytics.bigdl.transform.vision.image
 
 import com.intel.analytics.bigdl.dataset.DataSet
+import com.intel.analytics.bigdl.dataset.segmentation.RLEMasks
 import com.intel.analytics.bigdl.tensor.Tensor
 import com.intel.analytics.bigdl.transform.vision.image.label.roi.RoiLabel
 import com.intel.analytics.bigdl.utils.{Engine, T, Table}
@@ -97,7 +98,7 @@ class MTImageFeatureToBatchSpec extends FlatSpec with Matchers with BeforeAndAft
           Tensor(new Array[Float](2*4), Array(2, 4)),
           null
         )
-        imf(RoiLabel.ISCROWD) = Tensor(Array(0f, 1f), Array(2))
+        imf(RoiImageInfo.ISCROWD) = Tensor(Array(0f, 1f), Array(2))
         imf(ImageFeature.originalSize) = (8, 16, 3)
         imf
       }).toArray
@@ -179,18 +180,26 @@ class MTImageFeatureToBatchSpec extends FlatSpec with Matchers with BeforeAndAft
         val in = input.select(1, i)
         in should be(expectedOutput)
         val t = target(i).asInstanceOf[Table]
-        t[Tensor[Float]](RoiLabel.ISCROWD) should be (Tensor(Array(0f, 1f), Array(2)))
-        // t[(Int, Int, Int)](RoiLabel.ORIGSIZE) should be((8, 16, 3))
-        t[Tensor[Float]](RoiLabel.BBOXES).size() should be (Array(2, 4))
-        t[Tensor[Float]](RoiLabel.CLASSES).size() should be (Array(2))
+        t[Tensor[Float]](RoiImageInfo.ISCROWD) should be (Tensor(Array(0f, 1f), Array(2)))
+        // t[(Int, Int, Int)](RoiImageInfo.ORIGSIZE) should be((8, 16, 3))
+        t[Tensor[Float]](RoiImageInfo.BBOXES).size() should be (Array(2, 4))
+        t[Tensor[Float]](RoiImageInfo.CLASSES).size() should be (Array(2))
       }
     })
   }
 
-  // todo: There is a race-condition bug in MTImageFeatureToBatch
-  /*
+  "RoiMiniBatch" should "serialize well" in {
+    def batch: RoiMiniBatch = RoiMiniBatch(
+      Tensor[Float](),
+      Array[RoiLabel](RoiLabel(Tensor[Float](), Tensor[Float]())),
+      Array[Tensor[Float]](Tensor[Float]()),
+      Tensor())
+    val result = sc.parallelize(Array(batch, batch, batch, batch, batch), 3)
+      .coalesce(2, true)
+      .takeSample(false, 3).head
+  }
+
   "MTImageFeatureToBatch classification" should "work well" in {
-    //
     val imgData = (0 to 1000).map(idx => (idx to (idx + 10*10*3)).map(_.toFloat).toArray)
       .map(arr => {
         val imf = ImageFeature()
@@ -199,13 +208,11 @@ class MTImageFeatureToBatchSpec extends FlatSpec with Matchers with BeforeAndAft
         imf(ImageFeature.label) = lab
         imf(ImageFeature.originalSize) = (10, 10, 3)
         imf
-      })
+      }).toArray
     val transformer = MTImageFeatureToBatch(10, 10, 19, new FeatureTransformer {}, toRGB = false)
-    val miniBatch = transformer(imgData.toIterator)
-    // val imgCheck = new Array[Boolean](1000)
+    val miniBatch = transformer(DataSet.array(imgData).data(false))
+    val imgCheck = new Array[Boolean](1001)
     miniBatch
-      .take(5)
-      // .take(1000 / 19)
       .foreach(batch => {
       (batch.size() <= 19) should be (true)
       val input = batch.getInput().asInstanceOf[Tensor[Float]]
@@ -221,16 +228,16 @@ class MTImageFeatureToBatchSpec extends FlatSpec with Matchers with BeforeAndAft
         B should be (G + 1)
         input.valueAt(i, 3, 10, 10) should be((idx.toFloat + 10 * 10 * 3 - 1) +- 0.000001f)
         target.valueAt(i) should be (idx.toFloat)
-        /* imgCheck(idx) should be (false)
-        imgCheck(idx) = true */
+        imgCheck(idx) should be (false)
+        imgCheck(idx) = true
       }
-
     })
+    imgCheck.count(!_) should be (0)
 
   }
 
   "MTImageFeatureToBatch with ROI" should "work well" in {
-    //
+    val imgCheck = new Array[Boolean](1001)
     val imgData = (0 to 1000).map(idx => (idx to (idx + 10*10*3)).map(_.toFloat).toArray)
       .map(arr => {
         val imf = ImageFeature()
@@ -238,34 +245,38 @@ class MTImageFeatureToBatchSpec extends FlatSpec with Matchers with BeforeAndAft
         imf(ImageFeature.label) = RoiLabel(
           Tensor(new Array[Float](2), Array(2)),
           Tensor(new Array[Float](2*4), Array(2, 4)),
-          Array(Tensor[Float](), Tensor[Float]())
+          Array(new RLEMasks(Array(), 10, 10),
+            new RLEMasks(Array(), 10, 10)
+          )
         )
-        imf(RoiLabel.ISCROWD) = Tensor(Array(0f, 1f), Array(2))
+        imf(RoiImageInfo.ISCROWD) = Tensor(Array(0f, 1f), Array(2))
         imf(ImageFeature.originalSize) = (10, 10, 3)
         imf
-      })
+      }).toArray
     val transformer = MTImageFeatureToBatch(10, 10, 19, new FeatureTransformer {},
       toRGB = false, extractRoi = true)
-    val miniBatch = transformer(imgData.toIterator)
-    // val imgCheck = new Array[Boolean](1000)
+    val miniBatch = transformer(DataSet.array(imgData).data(false))
     miniBatch
-      .take(5)
-      // .take(1000 / 19)
       .foreach(batch => {
       (batch.size() <= 19) should be (true)
       val target = batch.getTarget().asInstanceOf[Table]
       target.length() should be (batch.size())
       for(i <- 1 to batch.size()) {
         val t = target(i).asInstanceOf[Table]
-        t[Tensor[Float]](RoiLabel.ISCROWD) should be (Tensor(Array(0f, 1f), Array(2)))
-        t[(Int, Int, Int)](RoiLabel.ORIGSIZE) should be((10, 10, 3))
-        t[Tensor[Float]](RoiLabel.BBOXES).size() should be (Array(2, 4))
-        t[Tensor[Float]](RoiLabel.CLASSES).size() should be (Array(2))
-        t[Array[Tensor[Float]]](RoiLabel.MASKS).length should be (2)
+        RoiImageInfo.getIsCrowd(t) should be (Tensor(Array(0f, 1f), Array(2)))
+        RoiImageInfo.getImageInfo(t).size() should be(Array(4))
+        RoiImageInfo.getBBoxes(t).size() should be (Array(2, 4))
+        RoiImageInfo.getClasses(t).size() should be (Array(2))
+        RoiImageInfo.getMasks(t).length should be (2)
+        val idx = batch.getInput().asInstanceOf[Table].apply[Tensor[Float]](1)
+          .valueAt(i, 1, 1, 1).toInt
+        imgCheck(idx) should be (false)
+        imgCheck(idx) = true
       }
 
     })
+    imgCheck.count(!_) should be (0)
 
-  } */
+  }
 
 }