[Relax][Op] Introducing more high-level operators (apache#13)

MasterJH5574 · Nov 19, 2022 · a9c29e6 · a9c29e6
1 parent adb2961
commit a9c29e6
Show file tree

Hide file tree

Showing 18 changed files with 1,241 additions and 4 deletions.
diff --git a/include/tvm/relax/op_attr_types.h b/include/tvm/relax/op_attr_types.h
@@ -342,6 +342,67 @@ struct ConcatenateAttrs : public tvm::AttrsNode<ConcatenateAttrs> {
   }
 };  // struct ConcatenateAttrs
 
+/*! \brief Attributes used in dropout operator */
+struct DropoutAttrs : public tvm::AttrsNode<DropoutAttrs> {
+  double rate;
+
+  TVM_DECLARE_ATTRS(DropoutAttrs, "relax.attrs.DropoutAttrs") {
+    TVM_ATTR_FIELD(rate)
+        .describe("Fraction of the input that gets dropped out during training time")
+        .set_default(0.5);
+  }
+};  // struct DropoutAttrs
+
+/*! \brief Attributes used in layer_norm operator */
+struct LayerNormAttrs : public tvm::AttrsNode<LayerNormAttrs> {
+  Array<Integer> axis;
+  double epsilon;
+  bool center;
+  bool scale;
+
+  TVM_DECLARE_ATTRS(LayerNormAttrs, "relax.attrs.LayerNormAttrs") {
+    TVM_ATTR_FIELD(axis).set_default(Array<Integer>{Integer(-1)});
+    TVM_ATTR_FIELD(epsilon)
+        .describe("Small float added to variance to avoid dividing by zero")
+        .set_default(1e-5);
+    TVM_ATTR_FIELD(center)
+        .describe("If True, add offset of beta to normalized tensor. If False, beta is ignored")
+        .set_default(true);
+    TVM_ATTR_FIELD(scale)
+        .describe(
+            "If True, multiply by gamma. If False, gamma is not used. "
+            "When the next layer is piecewise linear (also, e.g., nn.relu), "
+            "this can be disabled since the scaling will be done by the next layer.")
+        .set_default(true);
+  }
+};  // struct LayerNormAttrs
+
+/*! \brief Attributes for reduction operators */
+struct ReduceAttrs : public tvm::AttrsNode<ReduceAttrs> {
+  Optional<Array<Integer>> axis;
+  bool keepdims;
+
+  TVM_DECLARE_ATTRS(ReduceAttrs, "relax.attrs.ReduceAttrs") {
+    TVM_ATTR_FIELD(axis)
+        .set_default(Optional<Array<Integer>>{NullOpt})
+        .describe(R"code(The axis or axes along which to perform the reduction.
+
+      The default, `axis=()`, will compute over all elements into a
+      scalar array with shape `(1,)`.
+
+      If `axis` is int, a reduction is performed on a particular axis.
+
+      If `axis` is a tuple of ints, a reduction is performed on all the axes
+      specified in the tuple.
+
+      If `exclude` is true, reduction will be performed on the axes that are
+      NOT in axis instead.)code");
+    TVM_ATTR_FIELD(keepdims).set_default(false).describe(
+        "If this is set to `True`, the reduced axes are left "
+        "in the result as dimension with size one.");
+  }
+};  // struct ReduceAttrs
+
 }  // namespace relax
 }  // namespace tvm
 #endif  // TVM_RELAX_OP_ATTR_TYPES_H_
diff --git a/python/tvm/relax/op/__init__.py b/python/tvm/relax/op/__init__.py
@@ -21,6 +21,7 @@
 from .base import *
 from .nn import *
 from .op_attrs import *
+from .reduce import *
 from .tensor import *
 from .transform import *
 from . import builtin

diff --git a/python/tvm/relax/op/nn/nn.py b/python/tvm/relax/op/nn/nn.py
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 """Relax Neural Network (NN) operators"""
+from typing import List, Union
+
 from tvm.relay.op.nn.utils import get_pad_tuple2d
 from ...expr import Expr
 from . import _ffi_api
@@ -180,6 +182,44 @@ def relu(data: Expr) -> Expr:
     return _ffi_api.relu(data)
 
 
+def gelu(data: Expr) -> Expr:
+    """Gaussian Error Linear Units function
+
+    .. math::
+       text{GELU}(x) = 0.5 * x * (1 + text{Tanh}(sqrt(2 / pi) * (x + 0.044715 * x^3)))
+
+    Parameters
+    ----------
+    data : Expr
+        The input data
+
+    Returns
+    -------
+    result : Expr
+        The computed result.
+    """
+    return _ffi_api.gelu(data)
+
+
+def silu(data: Expr) -> Expr:
+    """Sigmoid Linear Unit function
+
+    .. math::
+       text{SILU}(x) = x * sigmoid(x)
+
+    Parameters
+    ----------
+    data : Expr
+        The input data
+
+    Returns
+    -------
+    result : Expr
+        The computed result.
+    """
+    return _ffi_api.silu(data)
+
+
 def softmax(data: Expr, axis=-1) -> Expr:
     r"""Computes softmax.
 
@@ -392,3 +432,90 @@ def batch_norm(
     return _ffi_api.batch_norm(
         data, gamma, beta, moving_mean, moving_var, axis, epsilon, center, scale
     )
+
+
+def dropout(data: Expr, rate: float = 0.5) -> Expr:
+    """Applies the dropout operation to the input array.
+
+    During training, each element of the input is set to zero with
+    probability ``p``. The whole array is rescaled by ``1/(1-p)``
+    to keep the expected sum of the input unchanged.
+
+    Parameters
+    ----------
+    data : relax.Expr
+        The input data to the operator.
+
+    rate : float, default=0.5
+        The probability for an element to be reset to 0.
+
+    Returns
+    -------
+    result : relax.Expr
+        The result of dropout, which is a tuple of two tensors.
+        The first one is the original tensor and the second one is a
+        mask tensor (1.0 where element not dropped, 0.0 where dropped)
+    """
+    return _ffi_api.dropout(data, rate)
+
+
+def layer_norm(
+    data: Expr,
+    gamma: Expr,
+    beta: Expr,
+    axis: Union[int, List[int]] = -1,
+    epsilon: float = 1e-5,
+    center: bool = True,
+    scale: bool = True,
+):
+    r"""
+    Layer normalization (Lei Ba and et al., 2016).
+    Applies layer normalization to the n-dimensional input array.
+    This operator takes an n-dimensional input array and normalizes
+    the input using the given axis:
+
+    .. math::
+
+        out = \frac{data - mean(data, axis)}{\sqrt{var(data, axis)+\epsilon}}
+            * gamma + beta
+
+    Unlike batch normalization, the mean and var are computed along the channel dimension.
+
+    Assume the input has size k on axis 1, then both gamma and beta have shape (k,).
+
+    .. note::
+
+        This operator can be optimized away for inference.
+
+    Parameters
+    ----------
+    data : relax.Expr
+        Input to which layer_norm will be applied.
+
+    gamma : relax.Expr
+        The gamma scale factor.
+
+    beta : relax.Expr
+        The beta offset factor.
+
+    axis : Union[int, List[int]], default=-1
+        The axes that should be normalized, typically the axis of the channels.
+
+    epsilon : double, default=1e-5
+        Small float added to variance to avoid dividing by zero.
+
+    center : boolean, default=True
+        If True, add offset of beta to normalized tensor, If False,
+        beta is ignored.
+
+    scale : boolean, default=True
+        If True, multiply by gamma. If False, gamma is not used.
+
+    Returns
+    -------
+    result : relax.Expr
+        The normalized data.
+    """
+    if isinstance(axis, int):
+        axis = [axis]
+    return _ffi_api.layer_norm(data, gamma, beta, axis, epsilon, center, scale)
diff --git a/python/tvm/relax/op/op_attrs.py b/python/tvm/relax/op/op_attrs.py
@@ -102,3 +102,18 @@ class SqueezeAttrs(Attrs):
 @tvm._ffi.register_object("relax.attrs.ConcatenateAttrs")
 class ConcatenateAttrs(Attrs):
     """Attributes for concatenate operator"""
+
+
+@tvm._ffi.register_object("relax.attrs.DropoutAttrs")
+class DropoutAttrs(Attrs):
+    """Attributes for dropout operator"""
+
+
+@tvm._ffi.register_object("relax.attrs.LayerNormAttrs")
+class LayerNormAttrs(Attrs):
+    """Attributes used in layer_norm operator"""
+
+
+@tvm._ffi.register_object("relax.attrs.ReduceAttrs")
+class ReduceAttrs(Attrs):
+    """Attributes used in reduction operator"""