uncomment TimeDistributed (#4736)

intel-analytics · Sep 15, 2021 · 1b48624 · 1b48624
1 parent c6a7b4c
commit 1b48624
Show file tree

Hide file tree

Showing 9 changed files with 2,742 additions and 2,677 deletions.
diff --git a/python/dllib/src/bigdl/dllib/keras/layers/torch.py b/python/dllib/src/bigdl/dllib/keras/layers/torch.py
diff --git a/python/dllib/src/bigdl/dllib/keras/layers/wrappers.py b/python/dllib/src/bigdl/dllib/keras/layers/wrappers.py
@@ -16,40 +16,39 @@
 
 import sys
 
-from ..engine.topology import ZooKerasLayer
+from bigdl.dllib.keras.engine import ZooKerasLayer
 
 if sys.version >= '3':
     long = int
     unicode = str
 
-# uncomment when migrating zoo model
-# class TimeDistributed(ZooKerasLayer):
-#     """
-#     TimeDistributed wrapper.
-#     Apply a layer to every temporal slice of an input.
-#     The input should be at least 3D.
-#     The dimension of index one will be considered as the temporal dimension.
-#
-#     When you use this layer as the first layer of a model, you need to provide the argument
-#     input_shape (a shape tuple, does not include the batch dimension).
-#     name: String to specify the name of the wrapper. Default is None.
-#
-#     # Arguments
-#     layer: A layer instance.
-#     input_shape: A shape tuple, not including batch.
-#     name: String to set the name of the wrapper.
-#           If not specified, its name will by default to be a generated string.
-#
-#     >>> from bigdl.dllib.keras.layers import Dense
-#     >>> timedistributed = TimeDistributed(Dense(8), input_shape=(10, 12))
-#     creating: createZooKerasDense
-#     creating: createZooKerasTimeDistributed
-#     """
-#     def __init__(self, layer, input_shape=None, **kwargs):
-#         super(TimeDistributed, self).__init__(None,
-#                                               layer,
-#                                               list(input_shape) if input_shape else None,
-#                                               **kwargs)
+class TimeDistributed(ZooKerasLayer):
+    """
+    TimeDistributed wrapper.
+    Apply a layer to every temporal slice of an input.
+    The input should be at least 3D.
+    The dimension of index one will be considered as the temporal dimension.
+
+    When you use this layer as the first layer of a model, you need to provide the argument
+    input_shape (a shape tuple, does not include the batch dimension).
+    name: String to specify the name of the wrapper. Default is None.
+
+    # Arguments
+    layer: A layer instance.
+    input_shape: A shape tuple, not including batch.
+    name: String to set the name of the wrapper.
+          If not specified, its name will by default to be a generated string.
+
+    >>> from bigdl.dllib.keras.layers import Dense
+    >>> timedistributed = TimeDistributed(Dense(8), input_shape=(10, 12))
+    creating: createZooKerasDense
+    creating: createZooKerasTimeDistributed
+    """
+    def __init__(self, layer, input_shape=None, **kwargs):
+        super(TimeDistributed, self).__init__(None,
+                                              layer,
+                                              list(input_shape) if input_shape else None,
+                                              **kwargs)
 
 
 class Bidirectional(ZooKerasLayer):

diff --git a/python/dllib/src/test/bigdl/keras/test_layer.py b/python/dllib/src/test/bigdl/keras/test_layer.py
@@ -213,26 +213,25 @@ def test_regularizer(self):
                       loss='binary_crossentropy',
                       metrics=['acc'])
 
-# uncomment when Math310 and TimeDistributed fixed
-    # def test_transformer_forward_backward(self):
-    #     layer = ZLayer.TransformerLayer.init(
-    #         vocab=200, hidden_size=128, n_head=4, seq_len=20)
-    #
-    #     train_token = np.random.randint(20, size=(2, 20))
-    #     train_pos = np.zeros((2, 20), dtype=np.int32)
-    #     input = [train_token, train_pos]
-    #     self.assert_forward_backward(layer, input)
-    #
-    # def test_bert_forward_backward(self):
-    #     layer = ZLayer.BERT.init(
-    #         vocab=200, hidden_size=128, n_head=4, seq_len=20, intermediate_size=20)
-    #
-    #     train_token = np.random.randint(20, size=(2, 20))
-    #     token_type_id = np.zeros((2, 20), dtype=np.int32)
-    #     train_pos = np.zeros((2, 20), dtype=np.int32)
-    #     mask_attention = np.ones((2, 1, 1, 20), dtype=np.int32)
-    #     input = [train_token, token_type_id, train_pos, mask_attention]
-    #     self.assert_forward_backward(layer, input)
+    def test_transformer_forward_backward(self):
+        layer = ZLayer.TransformerLayer.init(
+            vocab=200, hidden_size=128, n_head=4, seq_len=20)
+
+        train_token = np.random.randint(20, size=(2, 20))
+        train_pos = np.zeros((2, 20), dtype=np.int32)
+        input = [train_token, train_pos]
+        self.assert_forward_backward(layer, input)
+
+    def test_bert_forward_backward(self):
+        layer = ZLayer.BERT.init(
+            vocab=200, hidden_size=128, n_head=4, seq_len=20, intermediate_size=20)
+
+        train_token = np.random.randint(20, size=(2, 20))
+        token_type_id = np.zeros((2, 20), dtype=np.int32)
+        train_pos = np.zeros((2, 20), dtype=np.int32)
+        mask_attention = np.ones((2, 1, 1, 20), dtype=np.int32)
+        input = [train_token, token_type_id, train_pos, mask_attention]
+        self.assert_forward_backward(layer, input)
 
 
 if __name__ == "__main__":

diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/keras/autograd/math.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/keras/autograd/math.scala
@@ -26,7 +26,7 @@ import com.intel.analytics.bigdl.dllib.{nn => bnn}
 import com.intel.analytics.bigdl.dllib.keras.layers._
 import com.intel.analytics.bigdl.dllib.keras.layers.internal._
 import com.intel.analytics.bigdl.dllib.keras.models._
-// import com.intel.analytics.bigdl.dllib.keras.layers.TimeDistributed
+import com.intel.analytics.bigdl.dllib.keras.layers.TimeDistributed
 
 import scala.reflect.ClassTag
 
@@ -303,11 +303,9 @@ object AutoGrad {
     val mm = InternalMM[T](transA = transposeX,
       transB = transposeY)
     val kmm = new KerasLayerWrapper[T](mm.asInstanceOf[AbstractModule[Activity, Activity, T]])
-// uncomment untill TimeDistributed ready
-//    if (xShape.length > 3 || yShape.length > 3) {
-//      TimeDistributed(kmm.asInstanceOf[KerasLayer[Activity, Tensor[T], T]]).from(xx, yy)
-//    } else kmm.from(xx, yy)
-    kmm.from(xx, yy)
+    if (xShape.length > 3 || yShape.length > 3) {
+      TimeDistributed(kmm.asInstanceOf[KerasLayer[Activity, Tensor[T], T]]).from(xx, yy)
+    } else kmm.from(xx, yy)
   }
 
   /**

diff --git a/...a/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/keras/layers/TimeDistributed.scala b/...a/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/keras/layers/TimeDistributed.scala
@@ -22,89 +22,78 @@ import com.intel.analytics.bigdl.dllib.keras.{KerasLayer}
 import com.intel.analytics.bigdl.dllib.tensor.Tensor
 import com.intel.analytics.bigdl.dllib.tensor.TensorNumericMath.TensorNumeric
 import com.intel.analytics.bigdl.dllib.utils.Shape
-// import com.intel.analytics.zoo.models.common.ZooModel
 import com.intel.analytics.bigdl.dllib.keras.Net
 import com.intel.analytics.bigdl.dllib.keras.layers.internal.InternalTimeDistributed
 import com.intel.analytics.bigdl.dllib.keras.layers.utils.KerasUtils
 import com.intel.analytics.bigdl.dllib.keras.models.KerasNet
 
 import scala.reflect.ClassTag
 
-// /**
-// * TimeDistributed wrapper.
-// * Apply a layer to every temporal slice of an input.
-// * The input should be at least 3D, and the dimension of index one
-// * will be considered to be the temporal dimension.
-// *
-// * When using this layer as the first layer in a model, you need to provide the argument
-// * inputShape (a Single Shape, does not include the batch dimension).
-// *
-// * If you apply TimeDistributed to a Dense layer, you can use:
-// * TimeDistributed(Dense(8), inputShape = Shape(10, 12))
-// *
-// * @param layer A layer instance.
-// * @param inputShape A Single Shape, does not include the batch dimension.
-// * @tparam T The numeric type of parameter(e.g. weight, bias). Only support float/double now.
-// */
-// class TimeDistributed[T: ClassTag](
-//  val layer: KerasLayer[Activity, Tensor[T], T],
-//  val inputShape: Shape = null)(implicit ev: TensorNumeric[T])
-//  extends KerasLayer[Activity, Tensor[T], T](KerasUtils.addBatch(inputShape)) with Net {
-//
-//  private var seqLen: Int = 0
-//
-//  private def getInnerShape(inputShape: Shape): Shape = {
-//    val sizes = inputShape.toSingle().toArray
-//    require(sizes.length >= 3,
-//      s"TimeDistributed requires at least 3D input, but got input dim ${sizes.length}")
-//    if (seqLen != 0) {
-//      // in case time dim is singleton
-//      if (sizes(1) != 1) seqLen = sizes(1)
-//    } else seqLen = sizes(1)
-//    Shape(Array(sizes(0)) ++ sizes.drop(2))
-//  }
-//
-//  private def getInnerOutputShape(shape: Shape): Shape = {
-//    val sizes = shape.toSingle().toArray
-//    Shape(Array(sizes(0), seqLen) ++ sizes.drop(1))
-//  }
-//
-//  override def computeOutputShape(inputShape: Shape): Shape = {
-//    val innerShape = if (inputShape.isInstanceOf[SingleShape]) getInnerShape(inputShape)
-//    else {
-//      val shapes = inputShape.toMulti()
-//      Shape(shapes.map(getInnerShape(_)))
-//    }
-//
-//    val innerOutputShape = layer.computeOutputShape(innerShape)
-//    getInnerOutputShape(innerOutputShape)
-//  }
-//
-//  override def doBuild(inputShape: Shape): AbstractModule[Activity, Tensor[T], T] = {
-//    val innerShape = if (inputShape.isInstanceOf[SingleShape]) getInnerShape(inputShape)
-//    else Shape(inputShape.toMulti().map(getInnerShape(_)))
-//    layer.build(innerShape)
-//    layer.asInstanceOf[AbstractModule[Activity, Tensor[T], T]]
-//    val timedistributed = InternalTimeDistributed[T](layer)
-//    timedistributed.asInstanceOf[AbstractModule[Activity, Tensor[T], T]]
-//  }
-// }
-//
-// object TimeDistributed {
-//  def apply[@specialized(Float, Double) T: ClassTag](
-//      layer: KerasLayer[Activity, Tensor[T], T],
-//      inputShape: Shape = null)(implicit ev: TensorNumeric[T]): TimeDistributed[T] = {
-//    new TimeDistributed[T](layer, inputShape)
-//  }
-//
-//  def apply[@specialized(Float, Double) T: ClassTag](
-//      layer: ZooModel[Activity, Activity, T],
-//      inputShape: Shape)(implicit ev: TensorNumeric[T]): TimeDistributed[T] = {
-//    layer.model match {
-//      case keras: KerasNet[T] =>
-//        new TimeDistributed[T](keras.asInstanceOf[KerasLayer[Activity, Tensor[T], T]], inputShape)
-//      case _ => throw new Exception(s"$layer is not defined in Keras style")
-//    }
-//  }
-// }
+ /**
+ * TimeDistributed wrapper.
+ * Apply a layer to every temporal slice of an input.
+ * The input should be at least 3D, and the dimension of index one
+ * will be considered to be the temporal dimension.
+ *
+ * When using this layer as the first layer in a model, you need to provide the argument
+ * inputShape (a Single Shape, does not include the batch dimension).
+ *
+ * If you apply TimeDistributed to a Dense layer, you can use:
+ * TimeDistributed(Dense(8), inputShape = Shape(10, 12))
+ *
+ * @param layer A layer instance.
+ * @param inputShape A Single Shape, does not include the batch dimension.
+ * @tparam T The numeric type of parameter(e.g. weight, bias). Only support float/double now.
+ */
+ class TimeDistributed[T: ClassTag](
+  val layer: KerasLayer[Activity, Tensor[T], T],
+  val inputShape: Shape = null)(implicit ev: TensorNumeric[T])
+  extends KerasLayer[Activity, Tensor[T], T](KerasUtils.addBatch(inputShape)) with Net {
+
+  private var seqLen: Int = 0
+
+  private def getInnerShape(inputShape: Shape): Shape = {
+    val sizes = inputShape.toSingle().toArray
+    require(sizes.length >= 3,
+      s"TimeDistributed requires at least 3D input, but got input dim ${sizes.length}")
+    if (seqLen != 0) {
+      // in case time dim is singleton
+      if (sizes(1) != 1) seqLen = sizes(1)
+    } else seqLen = sizes(1)
+    Shape(Array(sizes(0)) ++ sizes.drop(2))
+  }
+
+  private def getInnerOutputShape(shape: Shape): Shape = {
+    val sizes = shape.toSingle().toArray
+    Shape(Array(sizes(0), seqLen) ++ sizes.drop(1))
+  }
+
+  override def computeOutputShape(inputShape: Shape): Shape = {
+    val innerShape = if (inputShape.isInstanceOf[SingleShape]) getInnerShape(inputShape)
+    else {
+      val shapes = inputShape.toMulti()
+      Shape(shapes.map(getInnerShape(_)))
+    }
+
+    val innerOutputShape = layer.computeOutputShape(innerShape)
+    getInnerOutputShape(innerOutputShape)
+  }
+
+  override def doBuild(inputShape: Shape): AbstractModule[Activity, Tensor[T], T] = {
+    val innerShape = if (inputShape.isInstanceOf[SingleShape]) getInnerShape(inputShape)
+    else Shape(inputShape.toMulti().map(getInnerShape(_)))
+    layer.build(innerShape)
+    layer.asInstanceOf[AbstractModule[Activity, Tensor[T], T]]
+    val timedistributed = InternalTimeDistributed[T](layer)
+    timedistributed.asInstanceOf[AbstractModule[Activity, Tensor[T], T]]
+  }
+ }
+
+ object TimeDistributed {
+  def apply[@specialized(Float, Double) T: ClassTag](
+      layer: KerasLayer[Activity, Tensor[T], T],
+      inputShape: Shape = null)(implicit ev: TensorNumeric[T]): TimeDistributed[T] = {
+    new TimeDistributed[T](layer, inputShape)
+  }
+ }
 
diff --git a/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/keras/python/PythonZooKeras.scala b/scala/dllib/src/main/scala/com/intel/analytics/bigdl/dllib/keras/python/PythonZooKeras.scala
@@ -845,13 +845,13 @@ class PythonZooKeras[T: ClassTag](implicit ev: TensorNumeric[T]) extends PythonZ
     ThresholdedReLU(theta, toScalaShape(inputShape))
   }
 
-  // uncomment when migrate zoo model
-//  def createZooKerasTimeDistributed(
-//      layer: KerasLayer[Activity, Tensor[T], T],
-//      inputShape: JList[Int] = null): TimeDistributed[T] = {
-//    TimeDistributed(layer, toScalaShape(inputShape))
-//  }
+  def createZooKerasTimeDistributed(
+      layer: KerasLayer[Activity, Tensor[T], T],
+      inputShape: JList[Int] = null): TimeDistributed[T] = {
+    TimeDistributed(layer, toScalaShape(inputShape))
+  }
 
+  // uncomment when migrate zoo model
 //  def createZooKerasTimeDistributed(
 //      layer: ZooModel[Activity, Activity, T],
 //      inputShape: JList[Int]): TimeDistributed[T] = {