Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[microNPU] Add support for transpose convolution #9855

Merged
merged 1 commit into from
Feb 3, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 82 additions & 0 deletions python/tvm/relay/backend/contrib/ethosu/legalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,87 @@ def __call__(self, *args, **kwargs):
pass


class Conv2DTransposeRewriter(DFPatternCallback):
"""Convert conv2d_transpose related composite functions into
ethosu_conv2d_transpose operators."""

def __init__(self):
super().__init__(require_type=True)
self.pattern = (wildcard().has_attr({"Composite": "ethos-u.qnn_conv2d_transpose"}))(
wildcard()
)

def callback(
self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map
) -> tvm.relay.Expr:
params = ethosu_patterns.QnnConv2DTransposeParams(post.op.body)
params.ifm.tensor = post.args[0]

ofm_shape = params.ofm.shape
legalize_padding = params.legalize_padding

weight_to_ohwi_transform_map = {"IOHW": [1, 2, 3, 0]}
weights_values = params.weights.values
weights_values_ohwi = np.transpose(
weights_values, weight_to_ohwi_transform_map[str(params.weights.layout)]
)
weights_values_ohwi = np.flip(weights_values_ohwi, (1, 2))
weights = relay.const(weights_values_ohwi, dtype=params.weights.values.dtype)

bias_values = (
params.biases.tensor.data.asnumpy()
if params.biases
else np.zeros((params.ifm.shape[-1]))
)
scale_bias = vela_api.pack_biases(
biases=bias_values,
ifm_scale=params.ifm.q_params.scale_f32,
ifm_dtype=np.dtype(params.ifm.dtype),
weight_scales=params.weights.q_params.scale_f32,
ofm_scale=params.ofm.q_params.scale_f32,
is_activation_tanh_or_sigmoid=False,
)

reduced_op = ethosu_ops.ethosu_conv2d(
ifm=post.args[0],
weight=weights,
scale_bias=relay.const(scale_bias, "uint8"),
lut=relay.const([], dtype="int8"),
ifm_scale=float(params.ifm.q_params.scale_f32),
ifm_zero_point=int(params.ifm.q_params.zero_point),
weight_zero_point=int(params.weights.q_params.zero_point),
ofm_scale=float(params.ofm.q_params.scale_f32),
ofm_zero_point=int(params.ofm.q_params.zero_point),
kernel_shape=params.kernel_shape,
ofm_channels=int(ofm_shape[-1]),
strides=(1, 1),
padding=legalize_padding,
dilation=params.dilation,
ifm_layout=str(params.ifm.layout),
ofm_layout=str(params.ofm.layout),
upscale="ZEROS",
)

# Remove additional padding by 'cropping' back to expected size
return relay.strided_slice(reduced_op, (0, 0, 0, 0), ofm_shape)


@ir.transform.module_pass(opt_level=1)
class LegalizeConv2DTranspose:
"""This is the pass that wraps the Conv2DTransposeRewriter"""

def transform_module(
self, mod: tvm.ir.IRModule, ctx: tvm.ir.transform.PassContext
) -> tvm.ir.IRModule:
for global_var, func in mod.functions.items():
func = rewrite(Conv2DTransposeRewriter(), func)
mod.update_func(global_var, func)
return mod

def __call__(self, *args, **kwargs):
pass


class DepthwiseConv2DRewriter(DFPatternCallback):
"""Convert ethosu.qnn_depthwise_conv2d composite functions to ethosu_depthwise_conv2d
operators"""
Expand Down Expand Up @@ -1379,6 +1460,7 @@ def transform_module(
"""
mod = LegalizeSplit()(mod)
mod = LegalizeConv2D()(mod)
mod = LegalizeConv2DTranspose()(mod)
mod = LegalizeDepthwiseConv2D()(mod)
mod = LegalizeMaxPooling()(mod)
mod = LegalizeAvgPooling()(mod)
Expand Down
9 changes: 8 additions & 1 deletion python/tvm/relay/backend/contrib/ethosu/te/convolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,17 @@ def conv2d_compute(
stride_h, stride_w = [int(v) for v in strides]
dilation_h, dilation_w = [int(v) for v in dilation]
ofm_channels, kernel_h, kernel_w, ifm_channels = [int(v) for v in weight.shape]
upscale_factor = 2 if upscale != "NONE" else 1

# Compute operation for the IFM DMA pipeline
dmaed_ifm = dma_ifm_compute(
ifm, ifm_layout, ifm_zero_point, ifm_scale, weight.shape[3], padding
ifm,
ifm_layout,
ifm_zero_point,
ifm_scale,
weight.shape[3],
padding,
upscale_factor,
)

# 2D Convolution compute operation
Expand Down
2 changes: 1 addition & 1 deletion python/tvm/relay/backend/contrib/ethosu/tir/convolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def get_conv2d_params(stmt, producers, consumers):
padding=serial_padding,
activation=serial_activation,
rounding_mode=attrs["rounding_mode"],
upscale="NONE",
upscale=attrs["upscale"],
),
output_pointer,
replace_pointer,
Expand Down
14 changes: 14 additions & 0 deletions python/tvm/relay/backend/contrib/ethosu/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,20 @@ class QConv2DArgs(Enum):
WEIGHTS_SCALE = 5


class QConv2DTransposeArgs(Enum):
"""
This is a helper enum to obtain the correct index
of qnn.conv2d_transpose arguments.
"""

IFM = 0
WEIGHTS = 1
IFM_ZERO_POINT = 2
WEIGHTS_ZERO_POINT = 3
IFM_SCALE = 4
WEIGHTS_SCALE = 5


class RequantArgs(Enum):
"""
This is a helper enum to obtain the correct index
Expand Down
152 changes: 152 additions & 0 deletions python/tvm/relay/op/contrib/ethosu.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,137 @@ def is_valid(self) -> bool:
return not self.is_depthwise


class QnnConv2DTransposeParams:
"""
This class will parse a Call to a ethosu.qnn_conv2d_transpose composite
function and extract quantization information of all the associated tensors.
"""

composite_name = "ethos-u.qnn_conv2d_transpose"
# The NPU only supports padding upto the numbers as follows
padding_bounds = [31, 31, 32, 32]

@requires_vela
def __init__(self, func_body: tvm.relay.Function):
from tvm.relay.backend.contrib.ethosu.util import QConv2DTransposeArgs # type: ignore
from tvm.relay.backend.contrib.ethosu.util import BiasAddArgs
from tvm.relay.backend.contrib.ethosu.util import RequantArgs

requantize = func_body
call = func_body.args[0]
if str(call.op) == "nn.bias_add":
bias_add = call
call = call.args[0]
else:
bias_add = None
qnn_conv2d_transpose = call

data_layout = qnn_conv2d_transpose.attrs.data_layout
self.kernel_layout = qnn_conv2d_transpose.attrs.kernel_layout

self.weights = TensorParams(
qnn_conv2d_transpose.args[QConv2DTransposeArgs.WEIGHTS.value],
self.kernel_layout,
qnn_conv2d_transpose.args[QConv2DTransposeArgs.WEIGHTS_SCALE.value],
qnn_conv2d_transpose.args[QConv2DTransposeArgs.WEIGHTS_ZERO_POINT.value],
)
self.biases = (
TensorParams(
bias_add.args[BiasAddArgs.BIASES.value],
data_layout,
requantize.args[RequantArgs.IFM_SCALE.value],
requantize.args[RequantArgs.IFM_ZERO_POINT.value],
)
if bias_add
else None
)
self.ifm = TensorParams(
qnn_conv2d_transpose.args[QConv2DTransposeArgs.IFM.value],
data_layout,
qnn_conv2d_transpose.args[QConv2DTransposeArgs.IFM_SCALE.value],
qnn_conv2d_transpose.args[QConv2DTransposeArgs.IFM_ZERO_POINT.value],
)
self.ofm = TensorParams(
func_body,
data_layout,
requantize.args[RequantArgs.OFM_SCALE.value],
requantize.args[RequantArgs.OFM_ZERO_POINT.value],
)

attrs = qnn_conv2d_transpose.attrs
self.strides = attrs.strides
self.dilation = attrs.dilation
self.padding = attrs.padding
self.channels = attrs.channels
self.groups = attrs.groups
self.output_padding = attrs.output_padding

kernel_size_map = {
"IOHW": self.weights.shape[2:4],
}
self.kernel_shape = kernel_size_map[str(self.weights.layout)]

# Different padding is used in the legalization from conv2d_transpose
# to conv2d, so we to calculate it here to check that the new size fits
# within the bounds of the NPU before offloading.
pad_top = int(self.kernel_shape[0]) - 1 - int(self.padding[0])
pad_left = int(self.kernel_shape[1]) - 1 - int(self.padding[1])
pad_bottom = int(self.kernel_shape[0]) - 1 - int(self.padding[2])
pad_right = int(self.kernel_shape[1]) - 1 - int(self.padding[3])
if self.strides == [2, 2]:
pad_bottom -= 1
pad_right -= 1
self.legalize_padding = [pad_top, pad_left, pad_bottom, pad_right]

def is_valid(self) -> bool:
"""
This function checks whether QnnConv2D has compatible attributes with the NPU
"""

def check_compatible_output_size(ifm_shape, ofm_shape, padding, strides, kernel_shape):
is_valid_padding = padding == [0, 0, 0, 0]
if is_valid_padding:
expected_height = ifm_shape[1] * strides[0] + (kernel_shape[0] - strides[0])
expected_width = ifm_shape[2] * strides[1] + (kernel_shape[1] - strides[1])
else:
expected_height = ifm_shape[1] * strides[0]
expected_width = ifm_shape[2] * strides[1]
return ofm_shape[1] == expected_height and ofm_shape[2] == expected_width

tensor_params = [self.weights, self.ifm, self.ofm]
if not check_valid_dtypes(tensor_params, supported_dtypes=[np.int8]):
return False
if not check_weights(self.weights, self.dilation):
return False
if self.biases and not check_bias(self.biases):
return False
if not check_strides(self.strides, stride_range=(2, 2)):
return False
if not check_batch_size(self.ifm):
return False
if not check_dilation(self.dilation, dilation_range=(1, 1)):
return False
if not check_compatible_output_size(
self.ifm.shape,
self.ofm.shape,
[int(x) for x in self.padding],
self.strides,
self.kernel_shape,
):
return False
if not check_padding(self.legalize_padding, self.padding_bounds):
return False
if self.kernel_shape[0] - 2 - int(self.padding[2]) < 0:
return False
if self.kernel_shape[1] - 2 - int(self.padding[3]) < 0:
return False
if self.groups != 1:
return False
if list(self.output_padding) != [0, 0]:
return False
return True


class QnnDepthwiseConv2DParams(QnnConv2DParams):
"""
This class will parse a call to a ethosu.depthwise_conv2d composite function
Expand Down Expand Up @@ -348,6 +479,22 @@ def qnn_depthwise_conv2d_pattern() -> tvm.relay.dataflow_pattern.DFPattern:
return clip_or_req


def qnn_conv2d_transpose_pattern() -> tvm.relay.dataflow_pattern.DFPattern:
"""
This function creates the pattern for qnn.conv2d_transpose.
"""
qnn_conv2d_transpose = is_op("qnn.conv2d_transpose")(
wildcard(), is_constant(), is_constant(), is_constant(), is_constant(), is_constant()
).has_attr({"kernel_layout": "IOHW"})
optional_bias_add = (
is_op("nn.bias_add")(qnn_conv2d_transpose, is_constant()) | qnn_conv2d_transpose
)
req = is_op("qnn.requantize")(
optional_bias_add, is_constant(), is_constant(), is_constant(), is_constant()
)
return req


class MaxPool2DParams:
"""
This class will parse a call to a ethos-u.maxpool2d composite function
Expand Down Expand Up @@ -1299,6 +1446,11 @@ def pattern_table() -> List[Tuple[str, tvm.relay.dataflow_pattern.DFPattern, Cal
qnn_depthwise_conv2d_pattern(),
lambda pat: QnnDepthwiseConv2DParams(pat).is_valid(),
),
(
QnnConv2DTransposeParams.composite_name,
qnn_conv2d_transpose_pattern(),
lambda pat: QnnConv2DTransposeParams(pat).is_valid(),
),
(
MaxPool2DParams.composite_name,
qnn_maxpool2d_pattern(),
Expand Down
17 changes: 16 additions & 1 deletion src/relay/op/contrib/ethosu/convolution.cc
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,15 @@ bool EthosuConv2DRel(const Array<Type>& types, int num_inputs, const Attrs& attr
return false;
}

const std::unordered_set<std::string> upscale_methods = {"NONE", "ZEROS", "NEAREST"};
if (upscale_methods.find(param->upscale) == upscale_methods.end()) {
reporter->GetDiagCtx().EmitFatal(Diagnostic::Error(reporter->GetSpan())
<< "Invalid operator: Expected upsample method to be 'NONE', "
"'ZEROS' or 'NEAREST' but got "
<< param->upscale);
return false;
}

// The scale_bias should be provided as a tensor of size {ofm_channels, 10}
reporter->Assign(types[2], TensorType({weight->shape[0], 10}, DataType::UInt(8)));

Expand All @@ -162,10 +171,16 @@ bool EthosuConv2DRel(const Array<Type>& types, int num_inputs, const Attrs& attr
param->kernel_shape[1], weight->shape[3]},
weight->dtype));

Array<IndexExpr> ifm_shape = ifm->shape;
if (param->upscale != "NONE") {
ifm_shape = EthosuInferUpscaledInput(ifm_shape, param->ifm_layout);
}

// Assign ofm type
auto ofm_shape =
EthosuInferKernelOutput(ifm->shape, param->ifm_layout, param->ofm_layout, param->kernel_shape,
EthosuInferKernelOutput(ifm_shape, param->ifm_layout, param->ofm_layout, param->kernel_shape,
param->ofm_channels, param->dilation, param->strides, param->padding);

reporter->Assign(types[4], TensorType(ofm_shape, ifm->dtype));
return true;
}
Expand Down
3 changes: 2 additions & 1 deletion tests/python/contrib/test_ethosu/infra.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,6 +423,7 @@ def make_ethosu_conv2d(
weight_dtype="int8",
scale_bias_dtype="uint8",
rounding_mode="TFL",
upscale="NONE",
):
# conv params
weight_shape = (ofm_channels, kernel_shape[0], kernel_shape[1], ifm_channels)
Expand Down Expand Up @@ -451,7 +452,7 @@ def make_ethosu_conv2d(
clip_min=10 if activation == "CLIP" else 0,
clip_max=100 if activation == "CLIP" else 0,
rounding_mode=rounding_mode,
upscale="NONE",
upscale=upscale,
ifm_layout=ifm_layout,
ofm_layout=ofm_layout,
)
Expand Down
Loading