diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/augmentation/Crop.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/augmentation/Crop.scala index 5fbe3723cee..25217ba66e6 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/augmentation/Crop.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/augmentation/Crop.scala @@ -69,7 +69,9 @@ object Crop { y2 = Math.max(Math.min(y2, height), 0f) } val rect = new Rect(x1.toInt, y1.toInt, (x2 - x1).toInt, (y2 - y1).toInt) - input.submat(rect).copyTo(output) + val submat = input.submat(rect) + submat.copyTo(output) + submat.release() } } diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/augmentation/Expand.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/augmentation/Expand.scala index 8c805a12cc0..4b2cfb98526 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/augmentation/Expand.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/augmentation/Expand.scala @@ -65,7 +65,9 @@ class Expand(meansR: Int = 123, meansG: Int = 117, meansB: Int = 104, channels.get(1).setTo(new Scalar(meansG)) channels.get(2).setTo(new Scalar(meansR)) Core.merge(channels, output) - input.copyTo(output.submat(bboxRoi)) + val submat = output.submat(bboxRoi) + input.copyTo(submat) + submat.release() // release memory (0 to 2).foreach(channels.get(_).release()) expandBbox @@ -112,7 +114,9 @@ class FixExpand(expandHeight: Int, expandWidth: Int) extends FeatureTransformer val leftPad = ((expandWidth - input.width()) / 2).floor val bboxRoi = new Rect(leftPad.toInt, topPad.toInt, width, height) output.create(expandHeight, expandWidth, input.`type`()) - input.copyTo(output.submat(bboxRoi)) + val submat = output.submat(bboxRoi) + input.copyTo(submat) + submat.release() output.copyTo(input) feature(ImageFeature.boundingBox) = BoundingBox(leftPad, topPad, leftPad + width, topPad + height) diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/augmentation/Filler.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/augmentation/Filler.scala index bb0b727059e..b6e6813e30e 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/augmentation/Filler.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/dllib/feature/transform/vision/image/augmentation/Filler.scala @@ -41,6 +41,7 @@ class Filler(startX: Float, startY: Float, endX: Float, endY: Float, value: Int override def transformMat(feature: ImageFeature): Unit = { var fillMat: Mat = null + var submat: Mat = null try { val mat = feature.opencvMat() val x1 = (startX * mat.cols()).ceil.toInt @@ -48,9 +49,11 @@ class Filler(startX: Float, startY: Float, endX: Float, endY: Float, value: Int val y1 = (startY * mat.rows()).ceil.toInt val y2 = (endY * mat.rows()).ceil.toInt fillMat = new core.Mat(y2 - y1, x2 - x1, mat.`type`(), new core.Scalar(value, value, value)) - fillMat.copyTo(mat.submat(y1, y2, x1, x2)) + submat = mat.submat(y1, y2, x1, x2) + fillMat.copyTo(submat) } finally { if (null != fillMat) fillMat.release() + if (null != submat) submat.release() } } } diff --git a/spark/dl/src/main/scala/com/intel/analytics/bigdl/dllib/nn/mkldnn/SpatialBatchNormalization.scala b/spark/dl/src/main/scala/com/intel/analytics/bigdl/dllib/nn/mkldnn/SpatialBatchNormalization.scala index 06580eb7445..0a4b0af2391 100644 --- a/spark/dl/src/main/scala/com/intel/analytics/bigdl/dllib/nn/mkldnn/SpatialBatchNormalization.scala +++ b/spark/dl/src/main/scala/com/intel/analytics/bigdl/dllib/nn/mkldnn/SpatialBatchNormalization.scala @@ -47,6 +47,61 @@ class SpatialBatchNormalization( // same to scaled runningMean/runningVariance in dnn. private[bigdl] var needScale = false + class SwitchablePrimitives() { + private var _forwardDesc: Long = 0 + private var _updateOutputMemoryPrimitives : Array[Long] = _ + private var _updateOutputPrimitives: Array[Long] = _ + private var _fwdPrimDesc: Long = 0 + private var _inputFormat: NativeData = _ + private var _outputFormat: NativeData = _ + + def switchInOutFormats(): Unit = { + if (_inputFormat == null) { + _inputFormat = MemoryData.operationWant(fwdPrimDesc, Query.SrcPd) + } + if (_outputFormat == null) { + _outputFormat = MemoryData.operationWant(fwdPrimDesc, Query.DstPd) + } + _inputFormats(0) = _inputFormat + _outputFormats(0) = _outputFormat + } + + def fwdPrimDesc: Long = { + if (_fwdPrimDesc == 0) { + _fwdPrimDesc = if (relu) { + val postOps = MklDnnMemory.CreatePostOps() + MklDnn.PostOpsAppendEltwise(postOps, 1.0f, AlgKind.EltwiseRelu, 0.0f, 0.0f) + val attr = MklDnnMemory.CreateAttr() + MklDnn.AttrSetPostOps(attr, postOps) + MklDnnMemory.PrimitiveDescCreateV2(_forwardDesc, attr, runtime.engine, 0) + } else { + MklDnnMemory.PrimitiveDescCreate(_forwardDesc, runtime.engine, 0) + } + } + _fwdPrimDesc + } + + def forwardDesc(gen: () => Long): Long = { + if (_forwardDesc == 0) { + _forwardDesc = gen() + } + _forwardDesc + } + + def switchUpdateOutputMemoryPrimitives(gen: () => (Array[Long], Array[Long])): Unit = { + if (_updateOutputMemoryPrimitives == null) { + val generated = gen() + _updateOutputMemoryPrimitives = generated._1 + _updateOutputPrimitives = generated._2 + } + updateOutputMemoryPrimitives = _updateOutputMemoryPrimitives + updateOutputPrimitives = _updateOutputPrimitives + } + } + + @transient private lazy val trainingPrimitives = new SwitchablePrimitives + @transient private lazy val inferencePrimitives = new SwitchablePrimitives + @transient private var updateOutputTensors: Array[Tensor[Float]] = _ @transient private var updateOutputMemoryPrimitives: Array[Long] = _ @transient private var updateGradInputTensors: Array[Tensor[Float]] = _ @@ -142,62 +197,57 @@ class SpatialBatchNormalization( // the bn only accept F32 as input, like lrn val src = NativeData(inputs.head.shape, inputs.head.layout, DataType.F32) + // init once + if (_inputFormats == null) { + _inputFormats = new Array[MemoryData](1) + require(_outputFormats == null) + _outputFormats = new Array[MemoryData](1) + } + // init phase status initPhase(phase) - forwardDesc = modelPhase match { + + modelPhase match { case TrainingPhase => - MklDnnMemory.BatchNormForwardDescInit(PropKind.Forward, - src.getMemoryDescription(), eps.toFloat, MklDnn.BatchNormFlag.mkldnn_use_scaleshift) + forwardDesc = trainingPrimitives.forwardDesc(() => MklDnnMemory.BatchNormForwardDescInit( + PropKind.Forward, + src.getMemoryDescription(), eps.toFloat, MklDnn.BatchNormFlag.mkldnn_use_scaleshift)) + val fwdPrimDesc = trainingPrimitives.fwdPrimDesc + trainingPrimitives.switchInOutFormats() + trainingPrimitives.switchUpdateOutputMemoryPrimitives(() => { + val srcs = Array(inputFormats()(0), weightAndBias).map(_.getPrimitive(runtime)) + val dsts = Array(outputFormats()(0), mean, variance).map(_.getPrimitive(runtime)) + val indexes = Array.fill(srcs.length)(0) + val primitive = MklDnnMemory.PrimitiveCreate2(fwdPrimDesc, srcs, indexes, + srcs.length, dsts, dsts.length) + val _updateOutputMemoryPrimitives = srcs ++ dsts + val _updateOutputPrimitives = Array(primitive) + (_updateOutputMemoryPrimitives, _updateOutputPrimitives) + }) case InferencePhase => // we always use the weight and bias / scale and offset. So the flags should be combined // with use_scaleshift and use_global_stats. - MklDnnMemory.BatchNormForwardDescInit(PropKind.ForwardInference, - src.getMemoryDescription(), eps.toFloat, - MklDnn.BatchNormFlag.mkldnn_use_global_stats | MklDnn.BatchNormFlag.mkldnn_use_scaleshift) + forwardDesc = inferencePrimitives.forwardDesc(() => + MklDnnMemory.BatchNormForwardDescInit(PropKind.ForwardInference, + src.getMemoryDescription(), eps.toFloat, MklDnn.BatchNormFlag.mkldnn_use_global_stats + | MklDnn.BatchNormFlag.mkldnn_use_scaleshift)) + val fwdPrimDesc = inferencePrimitives.fwdPrimDesc + inferencePrimitives.switchInOutFormats() + inferencePrimitives.switchUpdateOutputMemoryPrimitives(() => { + val srcs = Array(inputFormats()(0), mean, variance, weightAndBias).map(_.getPrimitive + (runtime)) + val dsts = Array(outputFormats()(0).getPrimitive(runtime)) + val indexes = Array.fill(srcs.length)(0) + val primitive = MklDnnMemory.PrimitiveCreate2(fwdPrimDesc, srcs, indexes, + srcs.length, dsts, dsts.length) + val _updateOutputMemoryPrimitives = srcs ++ dsts + val _updateOutputPrimitives = Array(primitive) + (_updateOutputMemoryPrimitives, _updateOutputPrimitives) + }) case _ => throw new UnsupportedOperationException } - val primDesc = if (relu) { - val postOps = MklDnnMemory.CreatePostOps() - MklDnn.PostOpsAppendEltwise(postOps, 1.0f, AlgKind.EltwiseRelu, 0.0f, 0.0f) - val attr = MklDnnMemory.CreateAttr() - MklDnn.AttrSetPostOps(attr, postOps) - MklDnnMemory.PrimitiveDescCreateV2(forwardDesc, attr, runtime.engine, 0) - // TODO we should destroy these ops - } else { - MklDnnMemory.PrimitiveDescCreate(forwardDesc, runtime.engine, 0) - } - - if (_inputFormats == null) { - _inputFormats = new Array[MemoryData](1) - } - - if (_outputFormats == null) { - _outputFormats = new Array[MemoryData](1) - } - - _inputFormats(0) = MemoryData.operationWant(primDesc, Query.SrcPd) - _outputFormats(0) = MemoryData.operationWant(primDesc, Query.DstPd) - - val (srcs, dsts) = if (modelPhase == TrainingPhase) { - val srcs = Array(inputFormats()(0), weightAndBias).map(_.getPrimitive(runtime)) - val dsts = Array(outputFormats()(0), mean, variance).map(_.getPrimitive(runtime)) - (srcs, dsts) - } else { - val srcs = Array(inputFormats()(0), mean, variance, weightAndBias).map { x => - x.getPrimitive(runtime) - } - val dsts = Array(outputFormats()(0).getPrimitive(runtime)) - (srcs, dsts) - } - val indexes = Array.fill(srcs.length)(0) - - val primitive = MklDnnMemory.PrimitiveCreate2(primDesc, srcs, indexes, - srcs.length, dsts, dsts.length) - - updateOutputMemoryPrimitives = srcs ++ dsts - updateOutputPrimitives = Array(primitive) - + // init once // if the output is not null, it means we have initialized the primitives before. // so we do not need create weightAndBias native space again. if (output == null || output.isInstanceOf[DnnTensor[_]] && @@ -209,6 +259,7 @@ class SpatialBatchNormalization( updateOutputTensors = null } + // init once if (this.weightAndBias.native == null) { if (modelPhase == InferencePhase) { this.runningMean.setMemoryData(