Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[microNPU] Add support for nearest neighbor and bilinear upsampling #9841

Merged
merged 2 commits into from
Jan 31, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions python/tvm/relay/backend/contrib/ethosu/legalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -1269,6 +1269,101 @@ def __call__(self, *args, **kwargs):
pass


class Resize2dRewriter(DFPatternCallback):
"""
Convert ethos-u.resize2d composite function to an equivalent operation that
performs the relevant upsampling operation.

Case 1: No upsampling (upscale factor of 1):
Identity.
Case 1: Nearest neighbor upsampling:
1x1 pooling with 2x2 nearest neighbor upsampling.
Case 2: Bilinear upsampling:
2x2 average pool with 2x2 nearest neighbor upsampling.
"""

def __init__(self):
super().__init__(require_type=True)
self.pattern = (
wildcard().has_attr({"Composite": ethosu_patterns.Resize2dParams.composite_name})
)(wildcard())

def callback(
self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
) -> tvm.relay.Expr:
params = ethosu_patterns.Resize2dParams(post.op.body)
params.ifm.tensor = post.args[0]

lut = relay.const([], "int8")
ifm_shape = params.ifm.shape
in_channels = ifm_shape[-1]
reduced_op = params.ifm.tensor
current_size = np.array(ifm_shape[1:3])
output_size = np.array(params.size)

if (current_size == output_size).all():
return ethosu_ops.ethosu_identity(
reduced_op,
lut,
ifm_scale=float(params.ifm.q_params.scale_f32),
ifm_zero_point=int(params.ifm.q_params.zero_point),
ofm_scale=float(params.ofm.q_params.scale_f32),
ofm_zero_point=int(params.ofm.q_params.zero_point),
)

padding = [0, 0, 0, 0]
rounding_mode = "TFL"
pool_shape = [1, 1]
if params.method == "linear":
pool_shape = [2, 2]
rounding_mode = "NATURAL"
if params.coordinate_transformation_mode == "asymmetric":
# Use SAME padding.
ypad = Resize2dRewriter.get_required_padding(ifm_shape[1])
xpad = Resize2dRewriter.get_required_padding(ifm_shape[2])
padding = [ypad // 2, xpad // 2, (ypad + 1) // 2, (xpad + 1) // 2]

return ethosu_ops.ethosu_pooling(
ifm=reduced_op,
lut=lut,
pooling_type="AVG",
ifm_scale=float(params.ifm.q_params.scale_f32),
ifm_zero_point=int(params.ifm.q_params.zero_point),
ofm_scale=float(params.ofm.q_params.scale_f32),
ofm_zero_point=int(params.ofm.q_params.zero_point),
pool_shape=pool_shape,
ofm_channels=in_channels,
strides=[1, 1],
padding=padding,
upscale="NEAREST",
rounding_mode=rounding_mode,
)

@staticmethod
def get_required_padding(input_size: int, pool_size: int = 2) -> int:
"""Gets the amount of padding required needed to achieve
'SAME' padding for a given axis."""
needed_input = (input_size - 1) + pool_size
total_padding = max(0, needed_input - input_size)
return total_padding


@ir.transform.module_pass(opt_level=1)
class LegalizeResize2d:
"""This is the pass that wraps Resize2dRewriter"""

def transform_module(
self, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
) -> tvm.ir.IRModule:
for global_var, func in mod.functions.items():
func = rewrite(Resize2dRewriter(), func)
mod.update_func(global_var, func)
return mod

def __call__(self, *args, **kwargs):
pass


@ir.transform.module_pass(opt_level=1)
class LegalizeEthosU:
"""This is the pass to call graph-rewrites to perform graph transformation
Expand Down Expand Up @@ -1299,6 +1394,7 @@ def transform_module(
mod = LegalizeConcat()(mod)
mod = LegalizeSigmoid()(mod)
mod = LegalizeRequantize()(mod)
mod = LegalizeResize2d()(mod)
mod = LegalizeReshape()(mod)
mod = LegalizeStridedSlice()(mod)
mod = LegalizeNoOps()(mod)
Expand Down
10 changes: 8 additions & 2 deletions python/tvm/relay/backend/contrib/ethosu/te/binary_elementwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,10 @@ def match_ethosu_binary_elementwise(output_tensor, device_config):
pad = binary_elementwise.op.input_tensors[0]
if pad.op.name != "ethosu_pad":
return None
convert_to_nhwc = pad.op.input_tensors[0]
upscale = pad.op.input_tensors[0]
if upscale.op.name != "ethosu_upscale":
return None
convert_to_nhwc = upscale.op.input_tensors[0]
if convert_to_nhwc.op.name != "ethosu_convert_to_nhwc":
return None
read = convert_to_nhwc.op.input_tensors[0]
Expand All @@ -297,7 +300,10 @@ def match_ethosu_binary_elementwise(output_tensor, device_config):
pad2 = binary_elementwise.op.input_tensors[1]
if pad2.op.name != "ethosu_pad":
return None
convert_to_nhwc2 = pad2.op.input_tensors[0]
upscale2 = pad2.op.input_tensors[0]
if upscale2.op.name != "ethosu_upscale":
return None
convert_to_nhwc2 = upscale2.op.input_tensors[0]
if convert_to_nhwc2.op.name != "ethosu_convert_to_nhwc":
return None
read2 = convert_to_nhwc2.op.input_tensors[0]
Expand Down
5 changes: 4 additions & 1 deletion python/tvm/relay/backend/contrib/ethosu/te/convolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,10 @@ def match_ethosu_conv2d(output_tensor, device_config):
pad = conv2d.op.input_tensors[0]
if pad.op.name != "ethosu_pad":
return None
convert_to_nhwc = pad.op.input_tensors[0]
upscale = pad.op.input_tensors[0]
if upscale.op.name != "ethosu_upscale":
return None
convert_to_nhwc = upscale.op.input_tensors[0]
if convert_to_nhwc.op.name != "ethosu_convert_to_nhwc":
return None
read = convert_to_nhwc.op.input_tensors[0]
Expand Down
5 changes: 4 additions & 1 deletion python/tvm/relay/backend/contrib/ethosu/te/depthwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,10 @@ def match_ethosu_depthwise_conv2d(output_tensor, device_config):
pad = depthwise2d.op.input_tensors[0]
if pad.op.name != "ethosu_pad":
return None
convert_to_nhwc = pad.op.input_tensors[0]
upscale = pad.op.input_tensors[0]
if upscale.op.name != "ethosu_upscale":
return None
convert_to_nhwc = upscale.op.input_tensors[0]
if convert_to_nhwc.op.name != "ethosu_convert_to_nhwc":
return None
read = convert_to_nhwc.op.input_tensors[0]
Expand Down
38 changes: 37 additions & 1 deletion python/tvm/relay/backend/contrib/ethosu/te/dma.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,13 +277,46 @@ def pad_compute(tensor: te.Tensor, padding: tuple) -> te.Tensor:
)


def upscale_compute(tensor: te.Tensor, upscale_factor: int) -> te.Tensor:
"""Apply upscaling to an NHWC tensor.

Parameters
----------
tensor : te.Tensor
The tensor to pad.
upscale_factor : int
The factor by which to apply upscaling.

Returns
-------
te.Tensor
The upscaled tensor.

"""
shape = tensor.shape

reason = f"The compiler only supports 2x2 upscaling, but factor was {upscale_factor}."
assert upscale_factor in (1, 2), reason
new_shape = (shape[0], shape[1] * upscale_factor, shape[2] * upscale_factor, shape[3])

upscale_attrs = {"op": "ethosu_upscale"}

return te.compute(
new_shape,
lambda nn, hh, ww, cc: tensor(nn, hh // upscale_factor, ww // upscale_factor, cc),
name="ethosu_upscale",
attrs=upscale_attrs,
)


def dma_ifm_compute(
ifm: te.Tensor,
layout: str,
zero_point: int,
scale: float,
channels: int,
padding: Tuple[int, int, int, int],
upscale_factor: Optional[int] = 1,
) -> te.Tensor:
"""A sequence of compute operators representing the DMA capabilities for an IFM.

Expand All @@ -301,6 +334,8 @@ def dma_ifm_compute(
The number of valid channels for the data.
padding : tuple
The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right).
upscale_factor : Optional[int]
The factor by which to apply upscaling. By default there will be no upscaling.

Returns
-------
Expand All @@ -310,7 +345,8 @@ def dma_ifm_compute(
"""
read_ifm = read_compute(ifm, zero_point, scale, layout=layout)
convert_to_nhwc_ifm = convert_to_nhwc_compute(read_ifm, layout, channels)
return pad_compute(convert_to_nhwc_ifm, padding)
upscale_ifm = upscale_compute(convert_to_nhwc_ifm, upscale_factor)
return pad_compute(upscale_ifm, padding)


def dma_ofm_compute(
Expand Down
10 changes: 8 additions & 2 deletions python/tvm/relay/backend/contrib/ethosu/te/pooling.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,12 @@ def pooling_compute(
padding = [int(v) for v in padding]
stride_h, stride_w = [int(v) for v in strides]
pool_shape_h, pool_shape_w = [int(v) for v in pool_shape]
upscale_factor = 2 if upscale != "NONE" else 1

# Compute operation for the IFM DMA pipeline
dmaed_ifm = dma_ifm_compute(ifm, ifm_layout, ifm_zero_point, ifm_scale, ofm_channels, padding)
dmaed_ifm = dma_ifm_compute(
ifm, ifm_layout, ifm_zero_point, ifm_scale, ofm_channels, padding, upscale_factor
)

# Pooling compute operation
ofm_height = (dmaed_ifm.shape[1] - pool_shape_h) // stride_h + 1
Expand Down Expand Up @@ -228,7 +231,10 @@ def match_ethosu_pooling(output_tensor, device_config):
pad = pool2d.op.input_tensors[0]
if pad.op.name != "ethosu_pad":
return None
convert_to_nhwc = pad.op.input_tensors[0]
upscale = pad.op.input_tensors[0]
if upscale.op.name != "ethosu_upscale":
return None
convert_to_nhwc = upscale.op.input_tensors[0]
if convert_to_nhwc.op.name != "ethosu_convert_to_nhwc":
return None
read = convert_to_nhwc.op.input_tensors[0]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,10 @@ def match_ethosu_unary_elementwise(output_tensor, device_config):
pad = unary_elementwise.op.input_tensors[0]
if pad.op.name != "ethosu_pad":
return None
convert_to_nhwc = pad.op.input_tensors[0]
upscale = pad.op.input_tensors[0]
if upscale.op.name != "ethosu_upscale":
return None
convert_to_nhwc = upscale.op.input_tensors[0]
if convert_to_nhwc.op.name != "ethosu_convert_to_nhwc":
return None
read = convert_to_nhwc.op.input_tensors[0]
Expand Down
27 changes: 27 additions & 0 deletions python/tvm/relay/backend/contrib/ethosu/tir/dma.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,31 @@ def _visit(expr):
)


def get_upscale_params(stmt):
"""Get the upscale parameters from a loop nest.

Parameters
----------
stmt : tvm.tir.AttrStmt
The outermost attribute statement of an upscale loop nest.

Returns
-------
input_pointer : tvm.tir.Var
The pointer consumed by the operation.
output_pointer : tvm.tir.Var
The pointer produced by the operation.
"""
_, body = get_op_attrs(stmt)
_, _, _, _, _, inner = get_outer_loops(body, "NHWC")
if isinstance(inner.value, tvm.tir.Call):
input_pointer = inner.value.args[1].buffer_var
else:
input_pointer = inner.value.buffer_var
output_pointer = inner.buffer_var
return (input_pointer, output_pointer)


def get_convert_to_nhwc_params(stmt):
"""Get the true number of channels from a convert_to_nhwc loop nest.

Expand Down Expand Up @@ -264,6 +289,8 @@ def get_ifm_params(pointer, producers):
"""
pad = producers[pointer]
serial_padding, input_pointer, _ = get_pad_params(pad)
upscale = producers[input_pointer]
input_pointer, _ = get_upscale_params(upscale)
convert_to_nhwc = producers[input_pointer]
in_channels, input_pointer, _ = get_convert_to_nhwc_params(convert_to_nhwc)
read = producers[input_pointer]
Expand Down
2 changes: 1 addition & 1 deletion python/tvm/relay/backend/contrib/ethosu/tir/pooling.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def get_pooling_params(
padding=serial_padding,
activation=serial_activation,
rounding_mode=attrs["rounding_mode"],
upscale="NONE",
upscale=attrs["upscale"],
),
output_pointer,
replace_pointer,
Expand Down
3 changes: 2 additions & 1 deletion python/tvm/relay/backend/contrib/ethosu/tir/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,8 @@ def from_output(cls, out):
convert_to_nhcwb16 = write.op.input_tensors[0]
conv2d = convert_to_nhcwb16.op.input_tensors[0]
pad = conv2d.op.input_tensors[0]
convert_to_nhwc = pad.op.input_tensors[0]
upscale = pad.op.input_tensors[0]
convert_to_nhwc = upscale.op.input_tensors[0]
read = convert_to_nhwc.op.input_tensors[0]
return cls(read, convert_to_nhwc, pad, conv2d, convert_to_nhcwb16, write)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ def _create_npu_op_conv2d(
_convert_clip_bounds(npu_conv2d_op)

npu_conv2d_op.rounding_mode = _create_npu_rounding_mode(serial_2d_convolution.rounding_mode)
npu_conv2d_op.upscale = _create_npu_resampling_mode(serial_2d_convolution.upscale)
npu_conv2d_op.ifm_upscale = _create_npu_resampling_mode(serial_2d_convolution.upscale)
accel_config = vela_api.get_accelerator_config()
weights_shape_ohwi = [
npu_conv2d_op.ofm.shape.depth,
Expand Down Expand Up @@ -506,7 +506,7 @@ def _create_npu_op_depthwise_conv2d(serial_2d_depthwise):
npu_depthwise_conv2d_op.rounding_mode = _create_npu_rounding_mode(
serial_2d_depthwise.rounding_mode
)
npu_depthwise_conv2d_op.upscale = _create_npu_resampling_mode(serial_2d_depthwise.upscale)
npu_depthwise_conv2d_op.ifm_upscale = _create_npu_resampling_mode(serial_2d_depthwise.upscale)
target_accel_config = vela_api.get_accelerator_config()
block_config = vela_api.get_optimal_block_config(npu_depthwise_conv2d_op, target_accel_config)
npu_depthwise_conv2d_op.block_config = block_config
Expand Down Expand Up @@ -656,7 +656,7 @@ def _create_npu_resampling_mode(
mode_map = {
"NONE": vapi.NpuResamplingMode.NONE,
"NEAREST": vapi.NpuResamplingMode.NEAREST,
"TRANSPOSE": vapi.NpuResamplingMode.TRANSPOSE,
"ZEROS": vapi.NpuResamplingMode.TRANSPOSE,
}
mode = str(mode.value)
assert mode in mode_map.keys()
Expand Down Expand Up @@ -737,7 +737,7 @@ def _create_npu_op_pooling(serial_pooling: spec.SerialPooling):
_convert_clip_bounds(npu_pooling_op)

npu_pooling_op.rounding_mode = _create_npu_rounding_mode(serial_pooling.rounding_mode)
npu_pooling_op.upscale = _create_npu_resampling_mode(serial_pooling.upscale)
npu_pooling_op.ifm_upscale = _create_npu_resampling_mode(serial_pooling.upscale)

target_accel_config = vela_api.get_accelerator_config()
block_config = vela_api.get_optimal_block_config(npu_pooling_op, target_accel_config)
Expand Down
Loading