diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/DataSet.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/DataSet.scala index 0b491a63c51..c6e28d5ff59 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/DataSet.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/DataSet.scala @@ -24,7 +24,7 @@ import com.intel.analytics.bigdl.dataset.image.{LabeledBGRImage, _} import com.intel.analytics.bigdl.dataset.segmentation.{COCODataset, COCODeserializer} import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.transform.vision.image.label.roi.RoiLabel -import com.intel.analytics.bigdl.transform.vision.image.{DistributedImageFrame, ImageFeature, ImageFrame, LocalImageFrame} +import com.intel.analytics.bigdl.transform.vision.image.{DistributedImageFrame, ImageFeature, ImageFrame, LocalImageFrame, RoiImageInfo} import com.intel.analytics.bigdl.utils.{Engine, RandomGenerator, T} import java.awt.Color import java.awt.image.{BufferedImage, DataBufferByte} @@ -643,7 +643,7 @@ object DataSet { require(rawdata.length == height * width * 3) val imf = ImageFeature(rawdata, RoiLabel(labelClasses, bboxes, masks), fileName) imf(ImageFeature.originalSize) = (height, width, 3) - imf(RoiLabel.ISCROWD) = isCrowd + imf(RoiImageInfo.ISCROWD) = isCrowd imf } .coalesce(num) diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/segmentation/MaskUtils.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/segmentation/MaskUtils.scala index 39264a64de3..e7773a6e69f 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/segmentation/MaskUtils.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/dataset/segmentation/MaskUtils.scala @@ -22,9 +22,14 @@ import scala.collection.mutable.ArrayBuffer abstract class SegmentationMasks extends Serializable { /** - * Convert to a RLE encoded tensor + * Convert to a RLE encoded masks */ def toRLE: RLEMasks + + /** + * Get the height and width + */ + def size: (Int, Int) } /** @@ -40,6 +45,11 @@ class PolyMasks(val poly: Array[Array[Float]], val height: Int, val width: Int) require(height > 0 && width > 0, "the height and width must > 0 for toRLE") MaskUtils.mergeRLEs(MaskUtils.poly2RLE(this, height, width), false) } + + /** + * Get the height and width + */ + override def size: (Int, Int) = (height, width) } object PolyMasks { @@ -69,6 +79,8 @@ class RLEMasks(val counts: Array[Int], val height: Int, val width: Int) extends SegmentationMasks { override def toRLE: RLEMasks = this + override def size: (Int, Int) = (height, width) + // cached bbox value @transient lazy val bbox: (Float, Float, Float, Float) = MaskUtils.rleToOneBbox(this) diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/MTImageFeatureToBatch.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/MTImageFeatureToBatch.scala index a88741aa6cb..d8fb0476613 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/MTImageFeatureToBatch.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/MTImageFeatureToBatch.scala @@ -15,8 +15,8 @@ */ package com.intel.analytics.bigdl.transform.vision.image +import com.intel.analytics.bigdl.dataset.segmentation.RLEMasks import java.util.concurrent.atomic.AtomicInteger - import com.intel.analytics.bigdl.dataset.{MiniBatch, Sample, Transformer, Utils} import com.intel.analytics.bigdl.nn.abstractnn.Activity import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric @@ -24,9 +24,6 @@ import com.intel.analytics.bigdl.tensor.{Storage, Tensor} import com.intel.analytics.bigdl.transform.vision.image.label.roi.RoiLabel import com.intel.analytics.bigdl.utils.{Engine, T, Table} -import scala.collection.mutable.IndexedSeq -import scala.reflect.ClassTag - object MTImageFeatureToBatch { /** * The transformer from ImageFeature to mini-batches @@ -40,7 +37,7 @@ object MTImageFeatureToBatch { * @return */ def apply(width: Int, height: Int, batchSize: Int, - transformer: FeatureTransformer, toRGB: Boolean = true, extractRoi: Boolean = false) + transformer: FeatureTransformer, toRGB: Boolean = false, extractRoi: Boolean = false) : MTImageFeatureToBatch = { if (extractRoi) { new RoiMTImageFeatureToBatch ( @@ -162,7 +159,7 @@ private class PreFetch extends Transformer[ImageFeature, ImageFeature] { * @param toRGB if converted to RGB, default format is BGR */ class ClassificationMTImageFeatureToBatch private[bigdl](width: Int, height: Int, - totalBatchSize: Int, transformer: FeatureTransformer, toRGB: Boolean = true) + totalBatchSize: Int, transformer: FeatureTransformer, toRGB: Boolean = false) extends MTImageFeatureToBatch(totalBatchSize, transformer) { private val frameLength = height * width @@ -188,6 +185,73 @@ class ClassificationMTImageFeatureToBatch private[bigdl](width: Int, height: Int } } + +object RoiImageInfo { + // the keys in the target table + // fields from RoiLabel + val CLASSES = "classes" + val BBOXES = "bboxes" + val MASKS = "masks" + // ISCROWD and ORIGSIZE are stored in ImageFeature + val ISCROWD = "is_crowd" + val ORIGSIZE = "orig_size" + val SCORES = "scores" + val IMGINFO = "imginfo" + + /** + * Get the output score tensor from the table. + * (1 x N) tensor for N detections + * + * @param tab + * @return + */ + def getScores(tab: Table): Tensor[Float] = tab[Tensor[Float]](SCORES) + + /** + * Get the class label tensor from the table. See RoiLabel.classes + * the categories for each detections (see RoiLabel.clasees field) + * (1 x N), or (2 x N) Tensor[Float] + * + * @param tab + * @return + */ + def getClasses(tab: Table): Tensor[Float] = tab[Tensor[Float]](CLASSES) + + /** + * Get the bbox tensor from the table. See RoiLabel.bboxes + * @param tab + * @return + */ + def getBBoxes(tab: Table): Tensor[Float] = tab[Tensor[Float]](BBOXES) + + /** + * Get the (optional) mask data from the table. See RoiLabel.masks + * @param tab + * @return + */ + def getMasks(tab: Table): Array[RLEMasks] = tab[Array[RLEMasks]](MASKS) + + /** + * Get the isCrowd tensor from the table. Should be 1 x N vector (N is the # of detections) + * @param tab + * @return + */ + def getIsCrowd(tab: Table): Tensor[Float] = tab[Tensor[Float]](ISCROWD) + + /** + * Get the size of the image before resizing + * @return (height, width, channel) + */ + def getOrigSize(tab: Table): (Int, Int, Int) = tab[(Int, Int, Int)](ORIGSIZE) + + /** + * Get the isCrowd tensor from the table. Should be 1 x N vector (N is the # of detections) + * @param tab + * @return + */ + def getImageInfo(tab: Table): Tensor[Float] = tab[Tensor[Float]](IMGINFO) + +} /** * A batch of images with flattened RoiLabels * the getTarget() returns a Table with key from 1 to batchSize. Each key in the table is mapped to @@ -195,18 +259,18 @@ class ClassificationMTImageFeatureToBatch private[bigdl](width: Int, height: Int * info for one image (assume the image has N detections). The annotation table has * * Key Value - * RoiLabel.CLASSES the categories for each detections (see RoiLabel.clasees field) + * RoiImageInfo.CLASSES the categories for each detections (see RoiLabel.clasees field) * (1 x N), or (2 x N) Tensor[Float] - * RoiLabel.BBOXES the bboxes, (N x 4) Tensor[Float] - * RoiLabel.MASKS (Optional) the mask data, Array[Tensor[Float]\]. The outer array has N + * RoiImageInfo.BBOXES the bboxes, (N x 4) Tensor[Float] + * RoiImageInfo.MASKS (Optional) the mask data, Array[Tensor[Float]\]. The outer array has N * elements. The inner tensor holds the data for segmentation - * RoiLabel.ISCROWD Whether each detection is crowd. (1 x N) Tensor[Float]. + * RoiImageInfo.ISCROWD Whether each detection is crowd. (1 x N) Tensor[Float]. * -1: unknown, 0: not crowd, 1: is crowd - * RoiLabel.ImageInfo with shape (batchSize, 4), contains all images info + * RoiImageInfo.IMGINFO with shape (batchSize, 4), contains all images info * (height, width, original height, original width) */ -class RoiMiniBatch(val input: Tensor[Float], val target: IndexedSeq[RoiLabel], - val isCrowd: IndexedSeq[Tensor[Float]], val imageInfo: Tensor[Float] = null) +class RoiMiniBatch(val input: Tensor[Float], val target: Array[RoiLabel], + val isCrowd: Array[Tensor[Float]], val imageInfo: Tensor[Float] = null) extends MiniBatch[Float] { override def size(): Int = input.size(1) @@ -216,20 +280,18 @@ class RoiMiniBatch(val input: Tensor[Float], val target: IndexedSeq[RoiLabel], } override def getTarget(): Table = { - var i = 0 - val tables = (target, isCrowd).zipped.map { case (roiLabel, crowd) => - i += 1 + val tables = (target, isCrowd, 1 to isCrowd.length).zipped.map { case (roiLabel, crowd, i) => roiLabel.toTable - .update(RoiLabel.ISCROWD, crowd) - .update(RoiLabel.IMGINFO, imageInfo.select(1, i)) + .update(RoiImageInfo.ISCROWD, crowd) + .update(RoiImageInfo.IMGINFO, imageInfo.select(1, i)) } T.seq(tables) } override def slice(offset: Int, length: Int): MiniBatch[Float] = { val subInput = input.narrow(1, offset, length) - val subTarget = target.view(offset - 1, length) // offset starts from 1 - val subIsCrowd = isCrowd.view(offset - 1, length) // offset starts from 1 + val subTarget = target.slice(offset - 1, length) // offset starts from 1 + val subIsCrowd = isCrowd.slice(offset - 1, length) // offset starts from 1 val subSize = imageInfo.narrow(1, offset, length) RoiMiniBatch(subInput, subTarget, subIsCrowd, subSize) } @@ -241,8 +303,8 @@ class RoiMiniBatch(val input: Tensor[Float], val target: IndexedSeq[RoiLabel], } object RoiMiniBatch { - def apply(data: Tensor[Float], target: IndexedSeq[RoiLabel], - isCrowd: IndexedSeq[Tensor[Float]], imageInfo: Tensor[Float] = null): + def apply(data: Tensor[Float], target: Array[RoiLabel], + isCrowd: Array[Tensor[Float]], imageInfo: Tensor[Float] = null): RoiMiniBatch = new RoiMiniBatch(data, target, isCrowd, imageInfo) } @@ -258,31 +320,40 @@ object RoiMiniBatch { * @param toRGB if converted to RGB, default format is BGR */ class RoiMTImageFeatureToBatch private[bigdl](width: Int, height: Int, - totalBatchSize: Int, transformer: FeatureTransformer, toRGB: Boolean = true) + totalBatchSize: Int, transformer: FeatureTransformer, toRGB: Boolean = false) extends MTImageFeatureToBatch(totalBatchSize, transformer) { private val frameLength = height * width private val featureData: Array[Float] = new Array[Float](batchSize * frameLength * 3) private val labelData: Array[RoiLabel] = new Array[RoiLabel](batchSize) private val isCrowdData: Array[Tensor[Float]] = new Array[Tensor[Float]](batchSize) - private var featureTensor: Tensor[Float] = null + private val imgInfoData: Tensor[Float] = Tensor[Float](batchSize, 4) + private var featureTensor: Tensor[Float] = Tensor[Float]() override protected def processImageFeature(img: ImageFeature, position: Int): Unit = { img.copyTo(featureData, position * frameLength * 3, toRGB = toRGB) - val isCrowd = img(RoiLabel.ISCROWD).asInstanceOf[Tensor[Float]] + val isCrowd = img(RoiImageInfo.ISCROWD).asInstanceOf[Tensor[Float]] val label = img.getLabel.asInstanceOf[RoiLabel] require(label.bboxes.size(1) == isCrowd.size(1), "The number of detections" + "in ImageFeature's ISCROWD should be equal to the number of detections in the RoiLabel") isCrowdData(position) = isCrowd labelData(position) = label + imgInfoData.setValue(position + 1, 1, img.getHeight()) + imgInfoData.setValue(position + 1, 2, img.getWidth()) + imgInfoData.setValue(position + 1, 3, img.getOriginalHeight) + imgInfoData.setValue(position + 1, 4, img.getOriginalWidth) } - override protected def createBatch(batchSize: Int): MiniBatch[Float] = { - if (featureTensor == null) { - featureTensor = Tensor(Storage[Float](featureData), - storageOffset = 1, size = Array(batchSize, 3, height, width)) + override protected def createBatch(curBatchSize: Int): MiniBatch[Float] = { + if (featureTensor.nElement() != curBatchSize) { + featureTensor.set(Storage[Float](featureData), + storageOffset = 1, sizes = Array(curBatchSize, 3, height, width)) + } + def arraySlice[T](array: Array[T]) = { + if (array.length == curBatchSize) array else array.slice(0, curBatchSize) } - RoiMiniBatch(featureTensor, labelData.view, isCrowdData.view) + RoiMiniBatch(featureTensor, arraySlice(labelData), arraySlice(isCrowdData), + imgInfoData.narrow(1, 1, curBatchSize)) } } @@ -326,7 +397,7 @@ class RoiImageFeatureToBatchWithResize private[bigdl](sizeDivisible: Int = -1, t imageBuffer(position).resize(3, img.getHeight(), img.getWidth()) // save img to buffer img.copyTo(imageBuffer(position).storage().array(), 0, toRGB = toRGB) - val isCrowd = img(RoiLabel.ISCROWD).asInstanceOf[Tensor[Float]] + val isCrowd = img(RoiImageInfo.ISCROWD).asInstanceOf[Tensor[Float]] val label = img.getLabel.asInstanceOf[RoiLabel] if (isCrowd != null && label != null) { require(label.bboxes.size(1) == isCrowd.size(1), "The number of detections" + @@ -345,11 +416,14 @@ class RoiImageFeatureToBatchWithResize private[bigdl](sizeDivisible: Int = -1, t if (featureTensor == null) featureTensor = Tensor() featureTensor.resize(batchSize, 3, height, wide).fill(0.0f) // copy img buffer to feature tensor - for (i <- 0 to (batchSize - 1)) { + for (i <- 0 until batchSize) { featureTensor.select(1, i + 1).narrow(2, 1, imageBuffer(i).size(2)) .narrow(3, 1, imageBuffer(i).size(3)).copy(imageBuffer(i)) } - RoiMiniBatch(featureTensor, labelData.view(0, batchSize), - isCrowdData.view(0, batchSize), imgInfoData.narrow(1, 1, batchSize)) + def arraySlice[T](array: Array[T]) = { + if (array.length == batchSize) array else array.slice(0, batchSize) + } + RoiMiniBatch(featureTensor, arraySlice(labelData), + arraySlice(isCrowdData), imgInfoData.narrow(1, 1, batchSize)) } } diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/label/roi/RoiLabel.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/label/roi/RoiLabel.scala index 9bfd21a9c51..cb038780450 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/label/roi/RoiLabel.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/label/roi/RoiLabel.scala @@ -16,17 +16,17 @@ package com.intel.analytics.bigdl.transform.vision.image.label.roi -import com.intel.analytics.bigdl.dataset.segmentation.{RLEMasks, SegmentationMasks} +import com.intel.analytics.bigdl.dataset.segmentation.{MaskUtils, SegmentationMasks, RLEMasks} import com.intel.analytics.bigdl.tensor.Tensor +import com.intel.analytics.bigdl.transform.vision.image.RoiImageInfo import com.intel.analytics.bigdl.utils.{T, Table} -import com.intel.analytics.bigdl.dataset.segmentation.{RLEMasks, SegmentationMasks} /** * image target with classes and bounding boxes * * @param classes N (class labels) or 2 * N, the first row is class labels, * the second line is difficults - * @param bboxes N * 4 + * @param bboxes N * 4, (xmin, ymin, xmax, ymax) * @param masks the array of annotation masks of the targets */ case class RoiLabel(classes: Tensor[Float], bboxes: Tensor[Float], @@ -45,8 +45,8 @@ case class RoiLabel(classes: Tensor[Float], bboxes: Tensor[Float], s"be equal to the number of mask array ${masks.length}") } } else if (classes.nElement() > 0 && classes.dim() == 2) { - require(classes.size(2) == bboxes.size(1), s"the number of classes ${ classes.size(2) }" + - s"should be equal to the number of bounding box numbers ${ bboxes.size(1) }") + require(classes.size(2) == bboxes.size(1), s"the number of classes ${classes.size(2)}" + + s"should be equal to the number of bounding box numbers ${bboxes.size(1)}") if (masks != null) { require(classes.size(2) == masks.length, s"the number of classes ${classes.size(2)}" + s"should be equal to the number of bounding box numbers ${masks.length}") @@ -57,10 +57,11 @@ case class RoiLabel(classes: Tensor[Float], bboxes: Tensor[Float], def toTable: Table = { val table = T() if (masks != null) { - table(RoiLabel.MASKS) = masks.map(_.toRLE) + require(masks.length > 0, "The masks can either be null or a non-empty array") + table(RoiImageInfo.MASKS) = masks.map(_.toRLE) } - table(RoiLabel.CLASSES) = classes - table(RoiLabel.BBOXES) = bboxes + table(RoiImageInfo.CLASSES) = classes + table(RoiImageInfo.BBOXES) = bboxes table } @@ -70,23 +71,6 @@ case class RoiLabel(classes: Tensor[Float], bboxes: Tensor[Float], } object RoiLabel { - val CLASSES = "classes" - val BBOXES = "bboxes" - val MASKS = "masks" - // ISCROWD and ORIGSIZE are stored in ImageFeature - val ISCROWD = "is_crowd" - val IMGINFO = "imgInfo" - val SCORES = "scores" - - - def getClasses(tab: Table): Tensor[Float] = tab[Tensor[Float]](CLASSES) - def getBBoxes(tab: Table): Tensor[Float] = tab[Tensor[Float]](BBOXES) - def getMasks(tab: Table): Array[RLEMasks] = tab[Array[RLEMasks]](MASKS) - def getIsCrowd(tab: Table): Tensor[Float] = tab[Tensor[Float]](ISCROWD) - def getScores(tab: Table): Tensor[Float] = tab[Tensor[Float]](SCORES) - def getImgInfo(tab: Table): Tensor[Float] = tab[Tensor[Float]](IMGINFO) - - def fromTensor(tensor: Tensor[Float]): RoiLabel = { val label = tensor.narrow(2, 1, 2).transpose(1, 2).contiguous() val rois = tensor.narrow(2, 3, 4) diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/MaskRCNN.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/MaskRCNN.scala index 4d4f1d59fd2..34bf3aa7cf4 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/MaskRCNN.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/models/maskrcnn/MaskRCNN.scala @@ -23,12 +23,12 @@ import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, Activity} import com.intel.analytics.bigdl.serialization.Bigdl.{AttrValue, BigDLModule} import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric +import com.intel.analytics.bigdl.transform.vision.image.RoiImageInfo import com.intel.analytics.bigdl.transform.vision.image.label.roi.RoiLabel import com.intel.analytics.bigdl.transform.vision.image.util.BboxUtil import com.intel.analytics.bigdl.utils.serializer._ import com.intel.analytics.bigdl.utils.serializer.converters.DataConverter import com.intel.analytics.bigdl.utils.{T, Table} - import scala.reflect.ClassTag import scala.reflect.runtime._ @@ -243,10 +243,10 @@ class MaskRCNN(val inChannels: Int, } start += boxNumber - postOutput.update(RoiLabel.MASKS, masksRLE) - postOutput.update(RoiLabel.BBOXES, bboxPerImg) - postOutput.update(RoiLabel.CLASSES, classPerImg) - postOutput.update(RoiLabel.SCORES, scorePerImg) + postOutput.update(RoiImageInfo.MASKS, masksRLE) + postOutput.update(RoiImageInfo.BBOXES, bboxPerImg) + postOutput.update(RoiImageInfo.CLASSES, classPerImg) + postOutput.update(RoiImageInfo.SCORES, scorePerImg) } output(i + 1) = postOutput diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/ValidationMethod.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/ValidationMethod.scala index 286b05bcfba..f4d344c1e81 100644 --- a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/ValidationMethod.scala +++ b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/optim/ValidationMethod.scala @@ -23,6 +23,7 @@ import com.intel.analytics.bigdl.nn.AbsCriterion import com.intel.analytics.bigdl.nn.abstractnn.Activity import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric +import com.intel.analytics.bigdl.transform.vision.image.RoiImageInfo import com.intel.analytics.bigdl.transform.vision.image.label.roi.RoiLabel import com.intel.analytics.bigdl.utils.Table import org.apache.commons.lang3.SerializationUtils @@ -313,10 +314,10 @@ object MAPUtil { val gtImage = new ArrayBuffer[GroundTruthRegion]() val roiLabel = gtTable[Table](i) if (roiLabel.length() > 0) { - val bbox = RoiLabel.getBBoxes(roiLabel) - val tclasses = RoiLabel.getClasses(roiLabel) - val isCrowd = RoiLabel.getIsCrowd(roiLabel) - val masks = if (isSegmentation) RoiLabel.getMasks(roiLabel) else null + val bbox = RoiImageInfo.getBBoxes(roiLabel) + val tclasses = RoiImageInfo.getClasses(roiLabel) + val isCrowd = RoiImageInfo.getIsCrowd(roiLabel) + val masks = if (isSegmentation) RoiImageInfo.getMasks(roiLabel) else null val bboxCnt = bbox.size(1) require(bboxCnt == tclasses.size(1), "CLASSES of target tables should have the" + "same size of the bbox counts") @@ -705,11 +706,11 @@ class MeanAveragePrecisionObjectDetection[T: ClassTag]( val imgOut = outTable[Table](imgId) // if the image contains empty predictions, do nothing if (imgOut.length() > 0) { - val bboxes = RoiLabel.getBBoxes(imgOut) - val scores = RoiLabel.getScores(imgOut) - val labels = RoiLabel.getClasses(imgOut) + val bboxes = RoiImageInfo.getBBoxes(imgOut) + val scores = RoiImageInfo.getScores(imgOut) + val labels = RoiImageInfo.getClasses(imgOut) require(bboxes.dim() == 2, "the bbox tensor should have 2 dimensions") - val masks = if (isSegmentation) Some(RoiLabel.getMasks(imgOut)) else None + val masks = if (isSegmentation) Some(RoiImageInfo.getMasks(imgOut)) else None val batchSize = bboxes.size(1) require(batchSize == labels.size(1), "CLASSES of target tables should have the" + "same size of the bbox counts") diff --git a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dataset/DataSetSpec.scala b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dataset/DataSetSpec.scala index 71413128b7c..f05ae64decd 100644 --- a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dataset/DataSetSpec.scala +++ b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/dataset/DataSetSpec.scala @@ -23,7 +23,7 @@ import com.intel.analytics.bigdl.dataset.image._ import com.intel.analytics.bigdl.dataset.segmentation.{COCODataset, COCOPoly, COCORLE, PolyMasks, RLEMasks} import com.intel.analytics.bigdl.models.utils.COCOSeqFileGenerator import com.intel.analytics.bigdl.tensor.Tensor -import com.intel.analytics.bigdl.transform.vision.image.ImageFeature +import com.intel.analytics.bigdl.transform.vision.image.{ImageFeature, RoiImageInfo} import com.intel.analytics.bigdl.transform.vision.image.label.roi.RoiLabel import com.intel.analytics.bigdl.utils.{Engine, RandomGenerator, SparkContextLifeCycle, TestUtils} import java.awt.image.DataBufferByte @@ -99,7 +99,7 @@ class DataSetSpec extends SparkContextLifeCycle with Matchers { .data(false) .map(imf => { (imf(ImageFeature.uri).asInstanceOf[String], imf.getOriginalSize, imf.getLabel[RoiLabel], - imf[Tensor[Float]](RoiLabel.ISCROWD), imf[Array[Byte]](ImageFeature.bytes)) + imf[Tensor[Float]](RoiImageInfo.ISCROWD), imf[Array[Byte]](ImageFeature.bytes)) }) .collect() .foreach({ case (uri, size, label, iscrowd, bytes) => diff --git a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/models/maskrcnn/MaskRCNNSpec.scala b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/models/maskrcnn/MaskRCNNSpec.scala index 276962d6178..0a8f147eb73 100644 --- a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/models/maskrcnn/MaskRCNNSpec.scala +++ b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/models/maskrcnn/MaskRCNNSpec.scala @@ -20,6 +20,7 @@ import com.intel.analytics.bigdl.Module import com.intel.analytics.bigdl.dataset.segmentation.RLEMasks import com.intel.analytics.bigdl.nn.Nms import com.intel.analytics.bigdl.tensor.Tensor +import com.intel.analytics.bigdl.transform.vision.image.RoiImageInfo import com.intel.analytics.bigdl.transform.vision.image.label.roi.RoiLabel import com.intel.analytics.bigdl.utils.serializer.ModuleSerializationTest import com.intel.analytics.bigdl.utils.{RandomGenerator, T, Table} @@ -63,29 +64,29 @@ class MaskRCNNSpec extends FlatSpec with Matchers { val first = output[Table](1) val second = output[Table](2) - first.get[Tensor[Float]](RoiLabel.BBOXES) should be( - output1.get[Tensor[Float]](RoiLabel.BBOXES)) - first.get[Tensor[Float]](RoiLabel.CLASSES) should be( - output1.get[Tensor[Float]](RoiLabel.CLASSES)) - first.get[Tensor[Float]](RoiLabel.SCORES) should be( - output1.get[Tensor[Float]](RoiLabel.SCORES)) + first.get[Tensor[Float]](RoiImageInfo.BBOXES) should be( + output1.get[Tensor[Float]](RoiImageInfo.BBOXES)) + first.get[Tensor[Float]](RoiImageInfo.CLASSES) should be( + output1.get[Tensor[Float]](RoiImageInfo.CLASSES)) + first.get[Tensor[Float]](RoiImageInfo.SCORES) should be( + output1.get[Tensor[Float]](RoiImageInfo.SCORES)) - second.get[Tensor[Float]](RoiLabel.BBOXES) should be( - output2.get[Tensor[Float]](RoiLabel.BBOXES)) - second.get[Tensor[Float]](RoiLabel.CLASSES) should be( - output2.get[Tensor[Float]](RoiLabel.CLASSES)) - second.get[Tensor[Float]](RoiLabel.SCORES) should be( - output2.get[Tensor[Float]](RoiLabel.SCORES)) + second.get[Tensor[Float]](RoiImageInfo.BBOXES) should be( + output2.get[Tensor[Float]](RoiImageInfo.BBOXES)) + second.get[Tensor[Float]](RoiImageInfo.CLASSES) should be( + output2.get[Tensor[Float]](RoiImageInfo.CLASSES)) + second.get[Tensor[Float]](RoiImageInfo.SCORES) should be( + output2.get[Tensor[Float]](RoiImageInfo.SCORES)) // for masks - val firstMasks = first.get[Array[RLEMasks]](RoiLabel.MASKS).get - val expectedMasks = output1.get[Array[RLEMasks]](RoiLabel.MASKS).get + val firstMasks = first.get[Array[RLEMasks]](RoiImageInfo.MASKS).get + val expectedMasks = output1.get[Array[RLEMasks]](RoiImageInfo.MASKS).get for (i <- 0 to firstMasks.length - 1) { firstMasks(i).counts should be(expectedMasks(i).counts) } - val secondMasks = second.get[Array[RLEMasks]](RoiLabel.MASKS).get - val expectedMasks2 = output2.get[Array[RLEMasks]](RoiLabel.MASKS).get + val secondMasks = second.get[Array[RLEMasks]](RoiImageInfo.MASKS).get + val expectedMasks2 = output2.get[Array[RLEMasks]](RoiImageInfo.MASKS).get for (i <- 0 to secondMasks.length - 1) { secondMasks(i).counts should be(expectedMasks2(i).counts) diff --git a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/optim/ValidationSpec.scala b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/optim/ValidationSpec.scala index 37423674a01..aac1b0cd141 100644 --- a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/optim/ValidationSpec.scala +++ b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/optim/ValidationSpec.scala @@ -18,6 +18,7 @@ package com.intel.analytics.bigdl.optim import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric import com.intel.analytics.bigdl.tensor.{Storage, Tensor} +import com.intel.analytics.bigdl.transform.vision.image.RoiImageInfo import com.intel.analytics.bigdl.transform.vision.image.label.roi.RoiLabel import com.intel.analytics.bigdl.utils.T import org.scalatest.{FlatSpec, Matchers} @@ -247,9 +248,9 @@ class ValidationSpec extends FlatSpec with Matchers { val target = T( T() - .update(RoiLabel.ISCROWD, Tensor[Float](T(0, 0, 0, 0, 0, 0, 0, 0, 0, 0))) - .update(RoiLabel.CLASSES, Tensor[Float](T(0, 0, 0, 0, 0, 1, 1, 1, 1, 1))) - .update(RoiLabel.BBOXES, Tensor[Float](T( + .update(RoiImageInfo.ISCROWD, Tensor[Float](T(0, 0, 0, 0, 0, 0, 0, 0, 0, 0))) + .update(RoiImageInfo.CLASSES, Tensor[Float](T(0, 0, 0, 0, 0, 1, 1, 1, 1, 1))) + .update(RoiImageInfo.BBOXES, Tensor[Float](T( T(100, 100, 200, 200), T(300, 100, 400, 200), T(100, 300, 200, 400), @@ -271,8 +272,8 @@ class ValidationSpec extends FlatSpec with Matchers { val outputTable = T( T() - .update(RoiLabel.CLASSES, Tensor[Float](T(0, 0, 0, 0, 1, 1, 1, 1))) - .update(RoiLabel.BBOXES, Tensor[Float](T( + .update(RoiImageInfo.CLASSES, Tensor[Float](T(0, 0, 0, 0, 1, 1, 1, 1))) + .update(RoiImageInfo.BBOXES, Tensor[Float](T( T(110, 90, 210, 190), T(310, 110, 410, 210), T(320, 290, 420, 390), @@ -283,7 +284,7 @@ class ValidationSpec extends FlatSpec with Matchers { T(1210, 1310, 1290, 1410) )) ) - .update(RoiLabel.SCORES, Tensor[Float](T(1, 2, 4, 3, 1, 3, 4, 2))) + .update(RoiImageInfo.SCORES, Tensor[Float](T(1, 2, 4, 3, 1, 3, 4, 2))) ) val v2 = new MeanAveragePrecisionObjectDetection(3) val result2 = v2(outputTable, target) @@ -294,9 +295,9 @@ class ValidationSpec extends FlatSpec with Matchers { "MeanAveragePrecisionObjectDetection" should "be correct on empty detections" in { val target = T( T() - .update(RoiLabel.ISCROWD, Tensor[Float](T(0, 0, 0, 0, 0))) - .update(RoiLabel.CLASSES, Tensor[Float](T(0, 0, 0, 0, 0))) - .update(RoiLabel.BBOXES, Tensor[Float](T( + .update(RoiImageInfo.ISCROWD, Tensor[Float](T(0, 0, 0, 0, 0))) + .update(RoiImageInfo.CLASSES, Tensor[Float](T(0, 0, 0, 0, 0))) + .update(RoiImageInfo.BBOXES, Tensor[Float](T( T(100, 100, 200, 200), T(300, 100, 400, 200), T(100, 300, 200, 400), @@ -314,9 +315,9 @@ class ValidationSpec extends FlatSpec with Matchers { "MeanAveragePrecisionObjectDetection" should "be correct on empty targets" in { val target = T( T() - .update(RoiLabel.ISCROWD, Tensor[Float](T(0, 0, 0, 0, 0))) - .update(RoiLabel.CLASSES, Tensor[Float](T(0, 0, 0, 0, 0))) - .update(RoiLabel.BBOXES, Tensor[Float](T( + .update(RoiImageInfo.ISCROWD, Tensor[Float](T(0, 0, 0, 0, 0))) + .update(RoiImageInfo.CLASSES, Tensor[Float](T(0, 0, 0, 0, 0))) + .update(RoiImageInfo.BBOXES, Tensor[Float](T( T(100, 100, 200, 200), T(300, 100, 400, 200), T(100, 300, 200, 400), @@ -329,25 +330,25 @@ class ValidationSpec extends FlatSpec with Matchers { ) val outputTable = T( T() - .update(RoiLabel.CLASSES, Tensor[Float](T(0, 0, 0, 0))) - .update(RoiLabel.BBOXES, Tensor[Float](T( + .update(RoiImageInfo.CLASSES, Tensor[Float](T(0, 0, 0, 0))) + .update(RoiImageInfo.BBOXES, Tensor[Float](T( T(110, 90, 210, 190), T(310, 110, 410, 210), T(320, 290, 420, 390), T(210, 310, 290, 410) )) ) - .update(RoiLabel.SCORES, Tensor[Float](T(1, 2, 9, 7))), + .update(RoiImageInfo.SCORES, Tensor[Float](T(1, 2, 9, 7))), T() - .update(RoiLabel.CLASSES, Tensor[Float](T(0, 0, 0, 0))) - .update(RoiLabel.BBOXES, Tensor[Float](T( + .update(RoiImageInfo.CLASSES, Tensor[Float](T(0, 0, 0, 0))) + .update(RoiImageInfo.BBOXES, Tensor[Float](T( T(1110, 1090, 1210, 1190), T(1310, 1110, 1410, 1210), T(1320, 1290, 1420, 1390), T(1210, 1310, 1290, 1410) )) ) - .update(RoiLabel.SCORES, Tensor[Float](T(0, 5, 4, 8))) + .update(RoiImageInfo.SCORES, Tensor[Float](T(0, 5, 4, 8))) ) val v = new MeanAveragePrecisionObjectDetection[Float](3) val result = v(outputTable, target) diff --git a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/transform/vision/image/MTImageFeatureToBatchSpec.scala b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/transform/vision/image/MTImageFeatureToBatchSpec.scala index 191d0df86cc..e8e5c77e232 100644 --- a/scala/dllib/src/test/scala/com/intel/analytics/bigdl/transform/vision/image/MTImageFeatureToBatchSpec.scala +++ b/scala/dllib/src/test/scala/com/intel/analytics/bigdl/transform/vision/image/MTImageFeatureToBatchSpec.scala @@ -17,6 +17,7 @@ package com.intel.analytics.bigdl.transform.vision.image import com.intel.analytics.bigdl.dataset.DataSet +import com.intel.analytics.bigdl.dataset.segmentation.RLEMasks import com.intel.analytics.bigdl.tensor.Tensor import com.intel.analytics.bigdl.transform.vision.image.label.roi.RoiLabel import com.intel.analytics.bigdl.utils.{Engine, T, Table} @@ -97,7 +98,7 @@ class MTImageFeatureToBatchSpec extends FlatSpec with Matchers with BeforeAndAft Tensor(new Array[Float](2*4), Array(2, 4)), null ) - imf(RoiLabel.ISCROWD) = Tensor(Array(0f, 1f), Array(2)) + imf(RoiImageInfo.ISCROWD) = Tensor(Array(0f, 1f), Array(2)) imf(ImageFeature.originalSize) = (8, 16, 3) imf }).toArray @@ -179,18 +180,26 @@ class MTImageFeatureToBatchSpec extends FlatSpec with Matchers with BeforeAndAft val in = input.select(1, i) in should be(expectedOutput) val t = target(i).asInstanceOf[Table] - t[Tensor[Float]](RoiLabel.ISCROWD) should be (Tensor(Array(0f, 1f), Array(2))) - // t[(Int, Int, Int)](RoiLabel.ORIGSIZE) should be((8, 16, 3)) - t[Tensor[Float]](RoiLabel.BBOXES).size() should be (Array(2, 4)) - t[Tensor[Float]](RoiLabel.CLASSES).size() should be (Array(2)) + t[Tensor[Float]](RoiImageInfo.ISCROWD) should be (Tensor(Array(0f, 1f), Array(2))) + // t[(Int, Int, Int)](RoiImageInfo.ORIGSIZE) should be((8, 16, 3)) + t[Tensor[Float]](RoiImageInfo.BBOXES).size() should be (Array(2, 4)) + t[Tensor[Float]](RoiImageInfo.CLASSES).size() should be (Array(2)) } }) } - // todo: There is a race-condition bug in MTImageFeatureToBatch - /* + "RoiMiniBatch" should "serialize well" in { + def batch: RoiMiniBatch = RoiMiniBatch( + Tensor[Float](), + Array[RoiLabel](RoiLabel(Tensor[Float](), Tensor[Float]())), + Array[Tensor[Float]](Tensor[Float]()), + Tensor()) + val result = sc.parallelize(Array(batch, batch, batch, batch, batch), 3) + .coalesce(2, true) + .takeSample(false, 3).head + } + "MTImageFeatureToBatch classification" should "work well" in { - // val imgData = (0 to 1000).map(idx => (idx to (idx + 10*10*3)).map(_.toFloat).toArray) .map(arr => { val imf = ImageFeature() @@ -199,13 +208,11 @@ class MTImageFeatureToBatchSpec extends FlatSpec with Matchers with BeforeAndAft imf(ImageFeature.label) = lab imf(ImageFeature.originalSize) = (10, 10, 3) imf - }) + }).toArray val transformer = MTImageFeatureToBatch(10, 10, 19, new FeatureTransformer {}, toRGB = false) - val miniBatch = transformer(imgData.toIterator) - // val imgCheck = new Array[Boolean](1000) + val miniBatch = transformer(DataSet.array(imgData).data(false)) + val imgCheck = new Array[Boolean](1001) miniBatch - .take(5) - // .take(1000 / 19) .foreach(batch => { (batch.size() <= 19) should be (true) val input = batch.getInput().asInstanceOf[Tensor[Float]] @@ -221,16 +228,16 @@ class MTImageFeatureToBatchSpec extends FlatSpec with Matchers with BeforeAndAft B should be (G + 1) input.valueAt(i, 3, 10, 10) should be((idx.toFloat + 10 * 10 * 3 - 1) +- 0.000001f) target.valueAt(i) should be (idx.toFloat) - /* imgCheck(idx) should be (false) - imgCheck(idx) = true */ + imgCheck(idx) should be (false) + imgCheck(idx) = true } - }) + imgCheck.count(!_) should be (0) } "MTImageFeatureToBatch with ROI" should "work well" in { - // + val imgCheck = new Array[Boolean](1001) val imgData = (0 to 1000).map(idx => (idx to (idx + 10*10*3)).map(_.toFloat).toArray) .map(arr => { val imf = ImageFeature() @@ -238,34 +245,38 @@ class MTImageFeatureToBatchSpec extends FlatSpec with Matchers with BeforeAndAft imf(ImageFeature.label) = RoiLabel( Tensor(new Array[Float](2), Array(2)), Tensor(new Array[Float](2*4), Array(2, 4)), - Array(Tensor[Float](), Tensor[Float]()) + Array(new RLEMasks(Array(), 10, 10), + new RLEMasks(Array(), 10, 10) + ) ) - imf(RoiLabel.ISCROWD) = Tensor(Array(0f, 1f), Array(2)) + imf(RoiImageInfo.ISCROWD) = Tensor(Array(0f, 1f), Array(2)) imf(ImageFeature.originalSize) = (10, 10, 3) imf - }) + }).toArray val transformer = MTImageFeatureToBatch(10, 10, 19, new FeatureTransformer {}, toRGB = false, extractRoi = true) - val miniBatch = transformer(imgData.toIterator) - // val imgCheck = new Array[Boolean](1000) + val miniBatch = transformer(DataSet.array(imgData).data(false)) miniBatch - .take(5) - // .take(1000 / 19) .foreach(batch => { (batch.size() <= 19) should be (true) val target = batch.getTarget().asInstanceOf[Table] target.length() should be (batch.size()) for(i <- 1 to batch.size()) { val t = target(i).asInstanceOf[Table] - t[Tensor[Float]](RoiLabel.ISCROWD) should be (Tensor(Array(0f, 1f), Array(2))) - t[(Int, Int, Int)](RoiLabel.ORIGSIZE) should be((10, 10, 3)) - t[Tensor[Float]](RoiLabel.BBOXES).size() should be (Array(2, 4)) - t[Tensor[Float]](RoiLabel.CLASSES).size() should be (Array(2)) - t[Array[Tensor[Float]]](RoiLabel.MASKS).length should be (2) + RoiImageInfo.getIsCrowd(t) should be (Tensor(Array(0f, 1f), Array(2))) + RoiImageInfo.getImageInfo(t).size() should be(Array(4)) + RoiImageInfo.getBBoxes(t).size() should be (Array(2, 4)) + RoiImageInfo.getClasses(t).size() should be (Array(2)) + RoiImageInfo.getMasks(t).length should be (2) + val idx = batch.getInput().asInstanceOf[Table].apply[Tensor[Float]](1) + .valueAt(i, 1, 1, 1).toInt + imgCheck(idx) should be (false) + imgCheck(idx) = true } }) + imgCheck.count(!_) should be (0) - } */ + } }