Skip to content

Commit

Permalink
Bert inference perf improve (intel-analytics#1555)
Browse files Browse the repository at this point in the history
Split big Dense into several small dense in TransformerLayer
Ensure bert is using MKL to do math operations.
  • Loading branch information
dding3 committed Aug 10, 2019
1 parent f807dac commit 0e4a243
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -77,5 +77,3 @@ object Dense {
wRegularizer, bRegularizer, bias, inputShape)
}
}


Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,15 @@

package com.intel.analytics.zoo.pipeline.api.keras.layers.internal

import com.intel.analytics.bigdl.nn.{Mean, Sum}
import com.intel.analytics.bigdl.nn.abstractnn.{AbstractModule, TensorModule}
import com.intel.analytics.bigdl.nn.abstractnn.TensorModule
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.zoo.common.TensorOperation

import scala.reflect.ClassTag

private[zoo] class InternalLayerNorm[T: ClassTag](val nOutput: Int = 768, val eps: Double = 1e-5)
private[zoo] class InternalLayerNorm[T: ClassTag](
val nOutput: Int = 768, val eps: Double = 1e-5)
(implicit ev: TensorNumeric[T]) extends TensorModule[T]{
val weight = Tensor.ones[T](nOutput).view(1, nOutput)
val bias = Tensor[T](nOutput).view(1, nOutput)
Expand All @@ -39,12 +40,13 @@ private[zoo] class InternalLayerNorm[T: ClassTag](val nOutput: Int = 768, val ep
override def updateOutput(input: Tensor[T]): Tensor[T] = {
val dim = input.dim()
val u = input.sum(dim).div(ev.fromType(input.size(dim)))
divInput1 = input.clone().sub(u) // x - u

divInput1 = TensorOperation.subTensor(input.clone(), u)
val square = divInput1.clone().square()
val s = square.sum(square.dim()).div(ev.fromType(square.size(square.dim())))
sqrtInput = s.add(ev.fromType(eps))
divInput2 = sqrtInput.clone().sqrt()
y = divInput1.clone.div(divInput2)
y = TensorOperation.divTensor(divInput1.clone(), divInput2)
output = y.clone().cmul(weight).add(bias)
output
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
package com.intel.analytics.zoo.pipeline.api.keras.layers.internal

import com.intel.analytics.bigdl.nn.abstractnn.TensorModule
import com.intel.analytics.bigdl.tensor.Tensor
import com.intel.analytics.bigdl.tensor.{DoubleType, FloatType, Tensor}
import com.intel.analytics.bigdl.tensor.TensorNumericMath.TensorNumeric
import com.intel.analytics.bigdl.utils.Shape

Expand All @@ -30,18 +30,20 @@ import scala.reflect.ClassTag
* where shift = max_i(x_i).
* Currently only support apply softmax normalization to the last dim.
*/
private[zoo] class InternalSoftMax[T: ClassTag]()(implicit ev: TensorNumeric[T])
extends TensorModule[T] {
private[zoo] class InternalSoftMax[T: ClassTag]()
(implicit ev: TensorNumeric[T]) extends TensorModule[T] {

override def updateOutput(input: Tensor[T]): Tensor[T] = {
val dim = input.dim()
val sizes = input.size()
val shift = input.max(dim)._1

val shiftedInput = input.sub(shift.expand(sizes))
val shiftedInput = input.clone().sub(shift.expand(sizes).contiguous())
val exp = shiftedInput.exp()

val sum = exp.sum(dim)
output = exp.div(sum.expand(sizes))
output = exp.div(sum.expand(sizes).contiguous())

output
}

Expand All @@ -54,7 +56,7 @@ private[zoo] class InternalSoftMax[T: ClassTag]()(implicit ev: TensorNumeric[T])
}

private[zoo] object InternalSoftMax{
def apply[@specialized(Float, Double) T: ClassTag]()
def apply[T: ClassTag]()
(implicit ev: TensorNumeric[T]) : InternalSoftMax[T] = {
new InternalSoftMax[T]()
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,14 @@ private[layers] class TransformerLayer[T: ClassTag](

def multiHeadSelfAttention(x: Variable[T], hiddenSize: Int,
attention_mask: Variable[T] = null): Variable[T] = {
val c = projectionLayer(hiddenSize * 3).from(x)
val query = c.slice(2, 0, hiddenSize)
val key = c.slice(2, hiddenSize, hiddenSize)
val value = c.slice(2, hiddenSize * 2, hiddenSize)
// val c = projectionLayer(hiddenSize * 3).from(x)
// val query = c.slice(2, 0, hiddenSize)
// val key = c.slice(2, hiddenSize, hiddenSize)
// val value = c.slice(2, hiddenSize * 2, hiddenSize)
val query = projectionLayer(hiddenSize).from(x)
val key = projectionLayer(hiddenSize).from(x)
val value = projectionLayer(hiddenSize).from(x)

val q = splitHeads(query, nHead)
val k = splitHeads(key, nHead, k = true)
val v = splitHeads(value, nHead)
Expand Down

0 comments on commit 0e4a243

Please sign in to comment.