apache · manupak · Jan 31, 2022 · Jan 14, 2022 · Jan 25, 2022
diff --git a/python/tvm/relay/backend/contrib/ethosu/legalize.py b/python/tvm/relay/backend/contrib/ethosu/legalize.py
@@ -1269,6 +1269,101 @@ def __call__(self, *args, **kwargs):
         pass
 
 
+class Resize2dRewriter(DFPatternCallback):
+    """
+    Convert ethos-u.resize2d composite function to an equivalent operation that
+    performs the relevant upsampling operation.
+
+    Case 1: No upsampling (upscale factor of 1):
+        Identity.
+    Case 1: Nearest neighbor upsampling:
+        1x1 pooling with 2x2 nearest neighbor upsampling.
+    Case 2: Bilinear upsampling:
+        2x2 average pool with 2x2 nearest neighbor upsampling.
+    """
+
+    def __init__(self):
+        super().__init__(require_type=True)
+        self.pattern = (
+            wildcard().has_attr({"Composite": ethosu_patterns.Resize2dParams.composite_name})
+        )(wildcard())
+
+    def callback(
+        self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
+    ) -> tvm.relay.Expr:
+        params = ethosu_patterns.Resize2dParams(post.op.body)
+        params.ifm.tensor = post.args[0]
+
+        lut = relay.const([], "int8")
+        ifm_shape = params.ifm.shape
+        in_channels = ifm_shape[-1]
+        reduced_op = params.ifm.tensor
+        current_size = np.array(ifm_shape[1:3])
+        output_size = np.array(params.size)
+
+        if (current_size == output_size).all():
+            return ethosu_ops.ethosu_identity(
+                reduced_op,
+                lut,
+                ifm_scale=float(params.ifm.q_params.scale_f32),
+                ifm_zero_point=int(params.ifm.q_params.zero_point),
+                ofm_scale=float(params.ofm.q_params.scale_f32),
+                ofm_zero_point=int(params.ofm.q_params.zero_point),
+            )
+
+        padding = [0, 0, 0, 0]
+        rounding_mode = "TFL"
+        pool_shape = [1, 1]
+        if params.method == "linear":
+            pool_shape = [2, 2]
+            rounding_mode = "NATURAL"
+            if params.coordinate_transformation_mode == "asymmetric":
+                # Use SAME padding.
+                ypad = Resize2dRewriter.get_required_padding(ifm_shape[1])
+                xpad = Resize2dRewriter.get_required_padding(ifm_shape[2])
+                padding = [ypad // 2, xpad // 2, (ypad + 1) // 2, (xpad + 1) // 2]
+
+        return ethosu_ops.ethosu_pooling(
+            ifm=reduced_op,
+            lut=lut,
+            pooling_type="AVG",
+            ifm_scale=float(params.ifm.q_params.scale_f32),
+            ifm_zero_point=int(params.ifm.q_params.zero_point),
+            ofm_scale=float(params.ofm.q_params.scale_f32),
+            ofm_zero_point=int(params.ofm.q_params.zero_point),
+            pool_shape=pool_shape,
+            ofm_channels=in_channels,
+            strides=[1, 1],
+            padding=padding,
+            upscale="NEAREST",
+            rounding_mode=rounding_mode,
+        )
+
+    @staticmethod
+    def get_required_padding(input_size: int, pool_size: int = 2) -> int:
+        """Gets the amount of padding required needed to achieve
+        'SAME' padding for a given axis."""
+        needed_input = (input_size - 1) + pool_size
+        total_padding = max(0, needed_input - input_size)
+        return total_padding
+
+
+@ir.transform.module_pass(opt_level=1)
+class LegalizeResize2d:
+    """This is the pass that wraps Resize2dRewriter"""
+
+    def transform_module(
+        self, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
+    ) -> tvm.ir.IRModule:
+        for global_var, func in mod.functions.items():
+            func = rewrite(Resize2dRewriter(), func)
+            mod.update_func(global_var, func)
+        return mod
+
+    def __call__(self, *args, **kwargs):
+        pass
+
+
 @ir.transform.module_pass(opt_level=1)
 class LegalizeEthosU:
     """This is the pass to call graph-rewrites to perform graph transformation
@@ -1299,6 +1394,7 @@ def transform_module(
         mod = LegalizeConcat()(mod)
         mod = LegalizeSigmoid()(mod)
         mod = LegalizeRequantize()(mod)
+        mod = LegalizeResize2d()(mod)
         mod = LegalizeReshape()(mod)
         mod = LegalizeStridedSlice()(mod)
         mod = LegalizeNoOps()(mod)

diff --git a/python/tvm/relay/backend/contrib/ethosu/te/binary_elementwise.py b/python/tvm/relay/backend/contrib/ethosu/te/binary_elementwise.py
@@ -288,7 +288,10 @@ def match_ethosu_binary_elementwise(output_tensor, device_config):
     pad = binary_elementwise.op.input_tensors[0]
     if pad.op.name != "ethosu_pad":
         return None
-    convert_to_nhwc = pad.op.input_tensors[0]
+    upscale = pad.op.input_tensors[0]
+    if upscale.op.name != "ethosu_upscale":
+        return None
+    convert_to_nhwc = upscale.op.input_tensors[0]
     if convert_to_nhwc.op.name != "ethosu_convert_to_nhwc":
         return None
     read = convert_to_nhwc.op.input_tensors[0]
@@ -297,7 +300,10 @@ def match_ethosu_binary_elementwise(output_tensor, device_config):
     pad2 = binary_elementwise.op.input_tensors[1]
     if pad2.op.name != "ethosu_pad":
         return None
-    convert_to_nhwc2 = pad2.op.input_tensors[0]
+    upscale2 = pad2.op.input_tensors[0]
+    if upscale2.op.name != "ethosu_upscale":
+        return None
+    convert_to_nhwc2 = upscale2.op.input_tensors[0]
     if convert_to_nhwc2.op.name != "ethosu_convert_to_nhwc":
         return None
     read2 = convert_to_nhwc2.op.input_tensors[0]

diff --git a/python/tvm/relay/backend/contrib/ethosu/te/convolution.py b/python/tvm/relay/backend/contrib/ethosu/te/convolution.py
@@ -267,7 +267,10 @@ def match_ethosu_conv2d(output_tensor, device_config):
     pad = conv2d.op.input_tensors[0]
     if pad.op.name != "ethosu_pad":
         return None
-    convert_to_nhwc = pad.op.input_tensors[0]
+    upscale = pad.op.input_tensors[0]
+    if upscale.op.name != "ethosu_upscale":
+        return None
+    convert_to_nhwc = upscale.op.input_tensors[0]
     if convert_to_nhwc.op.name != "ethosu_convert_to_nhwc":
         return None
     read = convert_to_nhwc.op.input_tensors[0]

diff --git a/python/tvm/relay/backend/contrib/ethosu/te/depthwise.py b/python/tvm/relay/backend/contrib/ethosu/te/depthwise.py
@@ -267,7 +267,10 @@ def match_ethosu_depthwise_conv2d(output_tensor, device_config):
     pad = depthwise2d.op.input_tensors[0]
     if pad.op.name != "ethosu_pad":
         return None
-    convert_to_nhwc = pad.op.input_tensors[0]
+    upscale = pad.op.input_tensors[0]
+    if upscale.op.name != "ethosu_upscale":
+        return None
+    convert_to_nhwc = upscale.op.input_tensors[0]
     if convert_to_nhwc.op.name != "ethosu_convert_to_nhwc":
         return None
     read = convert_to_nhwc.op.input_tensors[0]

diff --git a/python/tvm/relay/backend/contrib/ethosu/te/dma.py b/python/tvm/relay/backend/contrib/ethosu/te/dma.py
@@ -277,13 +277,46 @@ def pad_compute(tensor: te.Tensor, padding: tuple) -> te.Tensor:
     )
 
 
+def upscale_compute(tensor: te.Tensor, upscale_factor: int) -> te.Tensor:
+    """Apply upscaling to an NHWC tensor.
+
+    Parameters
+    ----------
+    tensor : te.Tensor
+        The tensor to pad.
+    upscale_factor : int
+        The factor by which to apply upscaling.
+
+    Returns
+    -------
+    te.Tensor
+        The upscaled tensor.
+
+    """
+    shape = tensor.shape
+
+    reason = f"The compiler only supports 2x2 upscaling, but factor was {upscale_factor}."
+    assert upscale_factor in (1, 2), reason
+    new_shape = (shape[0], shape[1] * upscale_factor, shape[2] * upscale_factor, shape[3])
+
+    upscale_attrs = {"op": "ethosu_upscale"}
+
+    return te.compute(
+        new_shape,
+        lambda nn, hh, ww, cc: tensor(nn, hh // upscale_factor, ww // upscale_factor, cc),
+        name="ethosu_upscale",
+        attrs=upscale_attrs,
+    )
+
+
 def dma_ifm_compute(
     ifm: te.Tensor,
     layout: str,
     zero_point: int,
     scale: float,
     channels: int,
     padding: Tuple[int, int, int, int],
+    upscale_factor: Optional[int] = 1,
 ) -> te.Tensor:
     """A sequence of compute operators representing the DMA capabilities for an IFM.
 
@@ -301,6 +334,8 @@ def dma_ifm_compute(
         The number of valid channels for the data.
     padding : tuple
         The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right).
+    upscale_factor : Optional[int]
+        The factor by which to apply upscaling. By default there will be no upscaling.
 
     Returns
     -------
@@ -310,7 +345,8 @@ def dma_ifm_compute(
     """
     read_ifm = read_compute(ifm, zero_point, scale, layout=layout)
     convert_to_nhwc_ifm = convert_to_nhwc_compute(read_ifm, layout, channels)
-    return pad_compute(convert_to_nhwc_ifm, padding)
+    upscale_ifm = upscale_compute(convert_to_nhwc_ifm, upscale_factor)
+    return pad_compute(upscale_ifm, padding)
 
 
 def dma_ofm_compute(

diff --git a/python/tvm/relay/backend/contrib/ethosu/te/pooling.py b/python/tvm/relay/backend/contrib/ethosu/te/pooling.py
@@ -109,9 +109,12 @@ def pooling_compute(
     padding = [int(v) for v in padding]
     stride_h, stride_w = [int(v) for v in strides]
     pool_shape_h, pool_shape_w = [int(v) for v in pool_shape]
+    upscale_factor = 2 if upscale != "NONE" else 1
 
     # Compute operation for the IFM DMA pipeline
-    dmaed_ifm = dma_ifm_compute(ifm, ifm_layout, ifm_zero_point, ifm_scale, ofm_channels, padding)
+    dmaed_ifm = dma_ifm_compute(
+        ifm, ifm_layout, ifm_zero_point, ifm_scale, ofm_channels, padding, upscale_factor
+    )
 
     # Pooling compute operation
     ofm_height = (dmaed_ifm.shape[1] - pool_shape_h) // stride_h + 1
@@ -228,7 +231,10 @@ def match_ethosu_pooling(output_tensor, device_config):
     pad = pool2d.op.input_tensors[0]
     if pad.op.name != "ethosu_pad":
         return None
-    convert_to_nhwc = pad.op.input_tensors[0]
+    upscale = pad.op.input_tensors[0]
+    if upscale.op.name != "ethosu_upscale":
+        return None
+    convert_to_nhwc = upscale.op.input_tensors[0]
     if convert_to_nhwc.op.name != "ethosu_convert_to_nhwc":
         return None
     read = convert_to_nhwc.op.input_tensors[0]

diff --git a/python/tvm/relay/backend/contrib/ethosu/te/unary_elementwise.py b/python/tvm/relay/backend/contrib/ethosu/te/unary_elementwise.py
@@ -205,7 +205,10 @@ def match_ethosu_unary_elementwise(output_tensor, device_config):
     pad = unary_elementwise.op.input_tensors[0]
     if pad.op.name != "ethosu_pad":
         return None
-    convert_to_nhwc = pad.op.input_tensors[0]
+    upscale = pad.op.input_tensors[0]
+    if upscale.op.name != "ethosu_upscale":
+        return None
+    convert_to_nhwc = upscale.op.input_tensors[0]
     if convert_to_nhwc.op.name != "ethosu_convert_to_nhwc":
         return None
     read = convert_to_nhwc.op.input_tensors[0]

diff --git a/python/tvm/relay/backend/contrib/ethosu/tir/dma.py b/python/tvm/relay/backend/contrib/ethosu/tir/dma.py
@@ -76,6 +76,31 @@ def _visit(expr):
     )
 
 
+def get_upscale_params(stmt):
+    """Get the upscale parameters from a loop nest.
+
+    Parameters
+    ----------
+    stmt : tvm.tir.AttrStmt
+        The outermost attribute statement of an upscale loop nest.
+
+    Returns
+    -------
+    input_pointer : tvm.tir.Var
+        The pointer consumed by the operation.
+    output_pointer : tvm.tir.Var
+        The pointer produced by the operation.
+    """
+    _, body = get_op_attrs(stmt)
+    _, _, _, _, _, inner = get_outer_loops(body, "NHWC")
+    if isinstance(inner.value, tvm.tir.Call):
+        input_pointer = inner.value.args[1].buffer_var
+    else:
+        input_pointer = inner.value.buffer_var
+    output_pointer = inner.buffer_var
+    return (input_pointer, output_pointer)
+
+
 def get_convert_to_nhwc_params(stmt):
     """Get the true number of channels from a convert_to_nhwc loop nest.
 
@@ -264,6 +289,8 @@ def get_ifm_params(pointer, producers):
     """
     pad = producers[pointer]
     serial_padding, input_pointer, _ = get_pad_params(pad)
+    upscale = producers[input_pointer]
+    input_pointer, _ = get_upscale_params(upscale)
     convert_to_nhwc = producers[input_pointer]
     in_channels, input_pointer, _ = get_convert_to_nhwc_params(convert_to_nhwc)
     read = producers[input_pointer]

diff --git a/python/tvm/relay/backend/contrib/ethosu/tir/pooling.py b/python/tvm/relay/backend/contrib/ethosu/tir/pooling.py
@@ -89,7 +89,7 @@ def get_pooling_params(
             padding=serial_padding,
             activation=serial_activation,
             rounding_mode=attrs["rounding_mode"],
-            upscale="NONE",
+            upscale=attrs["upscale"],
         ),
         output_pointer,
         replace_pointer,

diff --git a/python/tvm/relay/backend/contrib/ethosu/tir/scheduler.py b/python/tvm/relay/backend/contrib/ethosu/tir/scheduler.py
@@ -304,7 +304,8 @@ def from_output(cls, out):
         convert_to_nhcwb16 = write.op.input_tensors[0]
         conv2d = convert_to_nhcwb16.op.input_tensors[0]
         pad = conv2d.op.input_tensors[0]
-        convert_to_nhwc = pad.op.input_tensors[0]
+        upscale = pad.op.input_tensors[0]
+        convert_to_nhwc = upscale.op.input_tensors[0]
         read = convert_to_nhwc.op.input_tensors[0]
         return cls(read, convert_to_nhwc, pad, conv2d, convert_to_nhcwb16, write)
 

diff --git a/python/tvm/relay/backend/contrib/ethosu/tir_to_cs_translator.py b/python/tvm/relay/backend/contrib/ethosu/tir_to_cs_translator.py
@@ -443,7 +443,7 @@ def _create_npu_op_conv2d(
         _convert_clip_bounds(npu_conv2d_op)
 
     npu_conv2d_op.rounding_mode = _create_npu_rounding_mode(serial_2d_convolution.rounding_mode)
-    npu_conv2d_op.upscale = _create_npu_resampling_mode(serial_2d_convolution.upscale)
+    npu_conv2d_op.ifm_upscale = _create_npu_resampling_mode(serial_2d_convolution.upscale)
     accel_config = vela_api.get_accelerator_config()
     weights_shape_ohwi = [
         npu_conv2d_op.ofm.shape.depth,
@@ -506,7 +506,7 @@ def _create_npu_op_depthwise_conv2d(serial_2d_depthwise):
     npu_depthwise_conv2d_op.rounding_mode = _create_npu_rounding_mode(
         serial_2d_depthwise.rounding_mode
     )
-    npu_depthwise_conv2d_op.upscale = _create_npu_resampling_mode(serial_2d_depthwise.upscale)
+    npu_depthwise_conv2d_op.ifm_upscale = _create_npu_resampling_mode(serial_2d_depthwise.upscale)
     target_accel_config = vela_api.get_accelerator_config()
     block_config = vela_api.get_optimal_block_config(npu_depthwise_conv2d_op, target_accel_config)
     npu_depthwise_conv2d_op.block_config = block_config
@@ -656,7 +656,7 @@ def _create_npu_resampling_mode(
     mode_map = {
         "NONE": vapi.NpuResamplingMode.NONE,
         "NEAREST": vapi.NpuResamplingMode.NEAREST,
-        "TRANSPOSE": vapi.NpuResamplingMode.TRANSPOSE,
+        "ZEROS": vapi.NpuResamplingMode.TRANSPOSE,
     }
     mode = str(mode.value)
     assert mode in mode_map.keys()
@@ -737,7 +737,7 @@ def _create_npu_op_pooling(serial_pooling: spec.SerialPooling):
         _convert_clip_bounds(npu_pooling_op)
 
     npu_pooling_op.rounding_mode = _create_npu_rounding_mode(serial_pooling.rounding_mode)
-    npu_pooling_op.upscale = _create_npu_resampling_mode(serial_pooling.upscale)
+    npu_pooling_op.ifm_upscale = _create_npu_resampling_mode(serial_pooling.upscale)
 
     target_accel_config = vela_api.get_accelerator_config()
     block_config = vela_api.get_optimal_block_config(npu_pooling_op, target_accel_config)