apache · masahi · Jul 5, 2021 · Jun 24, 2021 · Jun 25, 2021 · Jun 25, 2021
diff --git a/docs/langref/relay_op.rst b/docs/langref/relay_op.rst
@@ -181,7 +181,9 @@ This level enables additional math and transform operators.
 .. autosummary::
    :nosignatures:
 
-   tvm.relay.image.resize
+   tvm.relay.image.resize1d
+   tvm.relay.image.resize2d
+   tvm.relay.image.resize3d
    tvm.relay.image.crop_and_resize
    tvm.relay.image.dilation2d
    tvm.relay.vision.multibox_prior

diff --git a/include/tvm/relay/attrs/image.h b/include/tvm/relay/attrs/image.h
@@ -32,31 +32,74 @@
 namespace tvm {
 namespace relay {
 
-/*! \brief Attributes used in image resize operator */
-struct ResizeAttrs : public tvm::AttrsNode<ResizeAttrs> {
+/*! \brief Attributes used in image resize1d operator */
+struct Resize1DAttrs : public tvm::AttrsNode<Resize1DAttrs> {
   Array<IndexExpr> size;
   std::string layout;
   std::string method;
   std::string coordinate_transformation_mode;
   std::string rounding_method;
-  double bicubic_alpha;
-  int bicubic_exclude;
+  double cubic_alpha;
+  int cubic_exclude;
   DataType out_dtype;
 
-  TVM_DECLARE_ATTRS(ResizeAttrs, "relay.attrs.ResizeAttrs") {
+  TVM_DECLARE_ATTRS(Resize1DAttrs, "relay.attrs.Resize1DAttrs") {
+    TVM_ATTR_FIELD(size).set_default(NullValue<Array<IndexExpr> >()).describe("Output Size.");
+    TVM_ATTR_FIELD(layout).set_default("NCW").describe(
+        "Dimension ordering of input data. Can be 'NCW', 'NWC', etc."
+        "'N', 'C', 'W' stands for batch, channel and width"
+        "dimensions respectively. Resize is applied on the"
+        "'W' dimension.");
+    TVM_ATTR_FIELD(method).set_default("linear").describe(
+        "Specify the mode to use for scaling."
+        "nearest_neighbor -  Nearest Neighbor"
+        "linear - Linear Interpolation"
+        "cubic - Cubic Interpolation");
+    TVM_ATTR_FIELD(coordinate_transformation_mode)
+        .set_default("half_pixel")
+        .describe(
+            "Describes how to transform the coordinate in the resized tensor"
+            "to the coordinate in the original tensor."
+            "Refer to the ONNX Resize operator specification for details"
+            "Available options are half_pixel, align_corners and asymmetric");
+    TVM_ATTR_FIELD(rounding_method)
+        .set_default("round")
+        .describe(
+            "indicates how to find the \"nearest\" pixel in nearest_neighbor method"
+            "Available options are round, floor, and ceil.");
+    TVM_ATTR_FIELD(cubic_alpha)
+        .set_default(-0.5)
+        .describe("Spline Coefficient for cubic interpolation");
+    TVM_ATTR_FIELD(cubic_exclude)
+        .set_default(0)
+        .describe("Flag to exclude exterior of the image during cubic interpolation");
+    TVM_ATTR_FIELD(out_dtype).set_default(NullValue<DataType>()).describe("Output data type.");
+  }
+};
+
+/*! \brief Attributes used in image resize2d operator */
+struct Resize2DAttrs : public tvm::AttrsNode<Resize2DAttrs> {
+  Array<IndexExpr> size;
+  std::string layout;
+  std::string method;
+  std::string coordinate_transformation_mode;
+  std::string rounding_method;
+  double cubic_alpha;
+  int cubic_exclude;
+  DataType out_dtype;
+
+  TVM_DECLARE_ATTRS(Resize2DAttrs, "relay.attrs.Resize2DAttrs") {
     TVM_ATTR_FIELD(size).set_default(NullValue<Array<IndexExpr> >()).describe("Output Size.");
     TVM_ATTR_FIELD(layout).set_default("NCHW").describe(
         "Dimension ordering of input data. Can be 'NCHW', 'NHWC', etc."
         "'N', 'C', 'H', 'W' stands for batch, channel, height, and width"
         "dimensions respectively. Resize is applied on the 'H' and"
         "'W' dimensions.");
-    TVM_ATTR_FIELD(method)
-        .set_default("bilinear")
-        .describe(
-            "Specify the mode to use for scaling."
-            "nearest_neighbor -  Nearest Neighbor"
-            "bilinear - Bilinear Interpolation"
-            "bicubic - Bicubic Interpolation");
+    TVM_ATTR_FIELD(method).set_default("linear").describe(
+        "Specify the mode to use for scaling."
+        "nearest_neighbor -  Nearest Neighbor"
+        "linear - Bilinear Interpolation"
+        "cubic - Bicubic Interpolation");
     TVM_ATTR_FIELD(coordinate_transformation_mode)
         .set_default("half_pixel")
         .describe(
@@ -69,43 +112,57 @@ struct ResizeAttrs : public tvm::AttrsNode<ResizeAttrs> {
         .describe(
             "indicates how to find the \"nearest\" pixel in nearest_neighbor method"
             "Available options are round, floor, and ceil.");
-    TVM_ATTR_FIELD(bicubic_alpha)
+    TVM_ATTR_FIELD(cubic_alpha)
         .set_default(-0.5)
         .describe("Spline Coefficient for Bicubic Interpolation");
-    TVM_ATTR_FIELD(bicubic_exclude)
+    TVM_ATTR_FIELD(cubic_exclude)
         .set_default(0)
         .describe("Flag to exclude exterior of the image during bicubic interpolation");
     TVM_ATTR_FIELD(out_dtype).set_default(NullValue<DataType>()).describe("Output data type.");
   }
 };
 
 /*! \brief Attributes used in image resize3d operator */
-struct Resize3dAttrs : public tvm::AttrsNode<Resize3dAttrs> {
+struct Resize3DAttrs : public tvm::AttrsNode<Resize3DAttrs> {
   Array<IndexExpr> size;
-  String layout;
-  String method;
-  String coordinate_transformation_mode;
+  std::string layout;
+  std::string method;
+  std::string coordinate_transformation_mode;
+  std::string rounding_method;
+  double cubic_alpha;
+  int cubic_exclude;
   DataType out_dtype;
 
-  TVM_DECLARE_ATTRS(Resize3dAttrs, "relay.attrs.Resize3dAttrs") {
+  TVM_DECLARE_ATTRS(Resize3DAttrs, "relay.attrs.Resize3DAttrs") {
     TVM_ATTR_FIELD(size).set_default(NullValue<Array<IndexExpr> >()).describe("Output Size.");
     TVM_ATTR_FIELD(layout).set_default("NCDHW").describe(
         "Dimension ordering of input data. Can be 'NCDHW', 'NDHWC', etc."
         "'N', 'C', 'D', 'H', 'W' stands for batch, channel, depth, height, and width"
         "dimensions respectively. Resize3d is applied on the 'D', 'H' and"
         "'W' dimensions.");
-    TVM_ATTR_FIELD(method)
-        .set_default("trilinear")
-        .describe(
-            "Specify the mode to use for scaling."
-            "nearest_neighbor -  Nearest Neighbor"
-            "trilinear - Trilinear Interpolation");
+    TVM_ATTR_FIELD(method).set_default("linear").describe(
+        "Specify the mode to use for scaling."
+        "nearest_neighbor -  Nearest Neighbor"
+        "linear - Trilinear Interpolation"
+        "cubic - Tricubic Interpolation");
     TVM_ATTR_FIELD(coordinate_transformation_mode)
         .set_default("half_pixel")
         .describe(
             "Describes how to transform the coordinate in the resized tensor"
             "to the coordinate in the original tensor."
+            "Refer to the ONNX Resize operator specification for details"
             "Available options are half_pixel, align_corners and asymmetric");
+    TVM_ATTR_FIELD(rounding_method)
+        .set_default("round")
+        .describe(
+            "indicates how to find the \"nearest\" pixel in nearest_neighbor method"
+            "Available options are round, floor, and ceil.");
+    TVM_ATTR_FIELD(cubic_alpha)
+        .set_default(-0.5)
+        .describe("Spline Coefficient for Tricubic Interpolation");
+    TVM_ATTR_FIELD(cubic_exclude)
+        .set_default(0)
+        .describe("Flag to exclude exterior of the image during tricubic interpolation");
     TVM_ATTR_FIELD(out_dtype).set_default(NullValue<DataType>()).describe("Output data type.");
   }
 };

diff --git a/python/tvm/relay/frontend/keras.py b/python/tvm/relay/frontend/keras.py
@@ -725,6 +725,7 @@ def _convert_upsample3d(inexpr, keras_layer, etab):
     params["scale_h"] = h
     params["scale_w"] = w
     params["layout"] = etab.data_layout
+    params["coordinate_transformation_mode"] = "asymmetric"
     out = _op.nn.upsampling3d(inexpr, **params)
     return out
 

diff --git a/python/tvm/relay/frontend/mxnet.py b/python/tvm/relay/frontend/mxnet.py
@@ -963,7 +963,7 @@ def _mx_resize(inputs, attrs):
     if scale_width is not None:
         width = (scale_width * shape[3]).astype("int32")
     size = (height, width)
-    return _op.image.resize(inputs[0], size, coordinate_transformation_mode="align_corners")
+    return _op.image.resize2d(inputs[0], size, coordinate_transformation_mode="align_corners")
 
 
 def _mx_amp_multicast(inputs, attrs):

diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
@@ -457,6 +457,7 @@ def _impl_v1(cls, inputs, attr, params):
 
         kernel_type = infer_type(inputs[1])
         kernel_shapes = [get_const_tuple(kernel_type.checked_type.shape)]
+
         if "kernel_shape" not in attr:
             attr["kernel_shape"] = kernel_shapes[0][2:]
 
@@ -1199,7 +1200,13 @@ def _impl_v9(cls, inputs, attr, params):
 
             layout = "NCDHW"
             out = _op.nn.upsampling3d(
-                inputs[0], scale_d, scale_h, scale_w, layout=layout, method=method
+                inputs[0],
+                scale_d,
+                scale_h,
+                scale_w,
+                layout=layout,
+                method=method,
+                coordinate_transformation_mode="asymmetric",
             )
         # in 2d case, use dynamic op
         else:
@@ -2388,31 +2395,41 @@ def _impl_v10(cls, inputs, attr, params):
         if mode == "nearest":
             method = "nearest_neighbor"
         elif mode == "linear":
-            method = "bilinear"
+            method = "linear"
         elif mode == "cubic":
-            method = "bicubic"
+            method = "cubic"
         else:
             raise tvm.error.OpAttributeInvalid(
                 'Value {} in attribute "mode" of operator Resize is not valid.'.format(mode)
             )
 
         scale = inputs[1]
         size = _op.cast(shape_of(inputs[0]), infer_type(scale).checked_type.dtype) * scale
-        layout = "NCHW"  # ONNX assumes NCHW layout
-        out_size = fold_constant(_op.strided_slice(size, [2], [4]))
-        return _op.image.resize(inputs[0], out_size, layout, method, "asymmetric")
+        ndims = len(infer_shape(inputs[0]))
+        out = None
+        if ndims == 3:
+            out_size = fold_constant(_op.strided_slice(size, [2], [3]))
+            out = _op.image.resize1d(inputs[0], out_size, "NCW", method, "asymmetric")
+        elif ndims == 4:
+            out_size = fold_constant(_op.strided_slice(size, [2], [4]))
+            out = _op.image.resize2d(inputs[0], out_size, "NCHW", method, "asymmetric")
+        elif ndims == 5:
+            out_size = fold_constant(_op.strided_slice(size, [2], [5]))
+            out = _op.image.resize3d(inputs[0], out_size, "NCDHW", method, "asymmetric")
+        else:
+            raise NotImplementedError("Resize only supports 3, 4, or 5 dims")
+        return out
 
     @classmethod
     def _impl_v11(cls, inputs, attr, params):
-        layout = "NCHW"  # ONNX assumes NCHW layout
-
+        ndims = len(infer_shape(inputs[0]))
         mode = attr.get("mode").decode("ascii")
         if mode == "nearest":
             method = "nearest_neighbor"
         elif mode == "linear":
-            method = "bilinear"
+            method = "linear"
         elif mode == "cubic":
-            method = "bicubic"
+            method = "cubic"
         else:
             raise tvm.error.OpAttributeInvalid(
                 'Value {} in attribute "mode" of operator Resize is not valid.'.format(mode)
@@ -2434,10 +2451,26 @@ def _impl_v11(cls, inputs, attr, params):
             assert len(scale_shape) != 0, "One of scale or size should be passed."
             size = _op.cast(shape_of(inputs[0]), infer_type(scale).checked_type.dtype) * scale
         out_size = fold_constant(_op.strided_slice(size, [2], [4]))
+        out = None
+        if ndims == 3:
+            out_size = fold_constant(_op.strided_slice(size, [2], [3]))
+            out = _op.image.resize1d(
+                inputs[0], out_size, "NCW", method, coord_trans, nearest_mode, alpha, exclude
+            )
+        elif ndims == 4:
+            out_size = fold_constant(_op.strided_slice(size, [2], [4]))
+            out = _op.image.resize2d(
+                inputs[0], out_size, "NCHW", method, coord_trans, nearest_mode, alpha, exclude
+            )
+        elif ndims == 5:
+            out_size = fold_constant(_op.strided_slice(size, [2], [5]))
+            out = _op.image.resize3d(
+                inputs[0], out_size, "NCDHW", method, coord_trans, nearest_mode, alpha, exclude
+            )
+        else:
+            raise NotImplementedError("Resize only supports 3, 4, or 5 dims")
 
-        return _op.image.resize(
-            inputs[0], out_size, layout, method, coord_trans, nearest_mode, alpha, exclude
-        )
+        return out
 
 
 class NonZero(OnnxOpConverter):

diff --git a/python/tvm/relay/frontend/pytorch.py b/python/tvm/relay/frontend/pytorch.py
@@ -1798,7 +1798,7 @@ def get_upsample_out_size(self, inputs, method):
                 else:
                     out_size.append(size)
         else:
-            scale_index = 3 if method in ["bilinear", "trilinear"] else 2
+            scale_index = 3 if method == "linear" else 2
             scales = inputs[scale_index]
             assert scales is not None, "neither out size nor scale provided"
             assert isinstance(scales, list)
@@ -1813,7 +1813,7 @@ def upsample(inputs, input_types):
             data = inputs[0]
             out_size = self.get_upsample_out_size(inputs, method)
 
-            if len(inputs) > 2 and method == "bilinear":
+            if len(inputs) > 2 and method == "linear":
                 align_corners = inputs[2]
             else:
                 align_corners = False
@@ -1826,7 +1826,7 @@ def upsample(inputs, input_types):
                 coord_trans = "half_pixel"
 
             def func(x):
-                return _op.image.resize(x, out_size, "NCHW", method, coord_trans)
+                return _op.image.resize2d(x, out_size, "NCHW", method, coord_trans)
 
             if self.is_quantized_tensor(data):
                 # input qparams are manually appended by us
@@ -1845,7 +1845,7 @@ def upsample3d(inputs, input_types):
             data = inputs[0]
             out_size = self.get_upsample_out_size(inputs, method)
 
-            if len(inputs) > 2 and method == "trilinear":
+            if len(inputs) > 2 and method == "linear":
                 align_corners = inputs[2]
             else:
                 align_corners = False
@@ -2195,6 +2195,8 @@ def interpolate(self, inputs, input_types):
         method = inputs[3]
         if method.startswith("nearest"):
             method = "nearest_neighbor"
+        elif method[0:2] == "bi":
+            method = method[2:]
 
         if method == "nearest_neighbor":
             coord_trans = "asymmetric"
@@ -2203,7 +2205,7 @@ def interpolate(self, inputs, input_types):
         else:
             coord_trans = "half_pixel"
 
-        return _op.image.resize(data, out_size, "NCHW", method, coord_trans)
+        return _op.image.resize2d(data, out_size, "NCHW", method, coord_trans)
 
     def numel(self, inputs, input_types):
         return _op.ndarray_size(inputs[0])
@@ -2473,9 +2475,9 @@ def create_convert_map(self):
             "aten::clamp": self.clamp,
             "aten::clamp_": self.clamp,
             "aten::detach": self.identity,
-            "aten::upsample_bilinear2d": self.make_upsample("bilinear"),
+            "aten::upsample_bilinear2d": self.make_upsample("linear"),
             "aten::upsample_nearest2d": self.make_upsample("nearest_neighbor"),
-            "aten::upsample_trilinear3d": self.make_upsample3d("trilinear"),
+            "aten::upsample_trilinear3d": self.make_upsample3d("linear"),
             "aten::upsample_nearest3d": self.make_upsample3d("nearest_neighbor"),
             "aten::expand_as": self.expand_as,
             "aten::lt": self.make_elemwise("less"),

diff --git a/python/tvm/relay/frontend/tensorflow_ops.py b/python/tvm/relay/frontend/tensorflow_ops.py
@@ -1075,7 +1075,7 @@ def _impl(inputs, attr, params, mod):
 
         # Ignore the new attributes from TF2.0, for now.
         return AttrCvt(
-            op_name="resize", ignores=["Tdim", "half_pixel_centers"], extras={"method": method}
+            op_name="resize2d", ignores=["Tdim", "half_pixel_centers"], extras={"method": method}
         )(inputs, attr)
 
     return _impl
@@ -2943,8 +2943,8 @@ def _impl(inputs, attr, params, mod):
     "Relu": AttrCvt("relu"),
     "Relu6": _relu6(),
     "Reshape": _reshape(),
-    "ResizeBicubic": _resize("bilinear"),
-    "ResizeBilinear": _resize("bilinear"),
+    "ResizeBicubic": _resize("cubic"),
+    "ResizeBilinear": _resize("linear"),
     "ResizeNearestNeighbor": _resize("nearest_neighbor"),
     "ReverseV2": _reverse_v2(),
     "RightShift": AttrCvt("right_shift"),