apache · manupak · May 9, 2022 · Jan 31, 2022 · Feb 22, 2022 · Mar 25, 2022
diff --git a/python/tvm/relay/backend/contrib/ethosu/legalize.py b/python/tvm/relay/backend/contrib/ethosu/legalize.py
@@ -920,7 +920,7 @@ def callback(
 
         if axis == [1, 2] and params.keepdims:
             weight_scale = 1
-            weight_values = np.ones([out_channels, filter_height, filter_width, in_channels])
+            weight_values = np.ones([out_channels, filter_height, filter_width, 1])
             scale_bias = vela_api.pack_biases(
                 biases=np.zeros(ifm_shape[-1]),
                 ifm_scale=params.ifm.q_params.scale_f32,
@@ -985,7 +985,7 @@ def callback(
             )
         else:
             weight_scale = 1 / (filter_height * filter_width)
-            weight_values = np.ones([out_channels, filter_height, filter_width, in_channels])
+            weight_values = np.ones([out_channels, filter_height, filter_width, 1])
             bias = -1 * int(params.ifm.q_params.zero_point) * filter_height * filter_width
 
             scale_bias = vela_api.pack_biases(

diff --git a/python/tvm/relay/backend/contrib/ethosu/tir/convolution.py b/python/tvm/relay/backend/contrib/ethosu/tir/convolution.py
@@ -16,8 +16,10 @@
 # under the License.
 # pylint: disable=invalid-name, unused-argument
 """Extract parameters from the convolution operators in TIR."""
+import math
 import tvm
-from ..vela_api import SCALE_BIAS_LENGTH
+from ethosu.vela import api as vapi
+from ..vela_api import SCALE_BIAS_LENGTH, get_accelerator_config
 from .utils import get_outer_loops, get_op_attrs, get_base_address, get_loads, get_stores
 from .dma import get_ifm_params, get_ofm_params
 from .spec import SerialKernel, SerialAddressRange, SerialActivation, Serial2DConvolution
@@ -47,6 +49,8 @@ def get_conv2d_params(stmt, producers_consumers):
         Whether this operator allocates its output.
 
     """
+    accel_config = get_accelerator_config()
+
     attrs, body = get_op_attrs(stmt)
     _, _, _, _, _, inner = get_outer_loops(body, "NHWC")
     rh = inner
@@ -75,17 +79,64 @@ def get_conv2d_params(stmt, producers_consumers):
     # Get scale_bias info
     scale_bias_load = loads[3]
     scale_bias_base = [get_base_address(index) for index in scale_bias_load.indices]
-    serial_scale_bias = SerialAddressRange(
-        address=tvm.tir.BufferLoad(scale_bias_load.buffer, scale_bias_base),
-        length=SCALE_BIAS_LENGTH * serial_ofm[3],
-    )
     # Get weight info
     weight_load = loads[2]
     weight_base = [get_base_address(index) for index in weight_load.indices]
-    serial_weight = SerialAddressRange(
-        address=tvm.tir.BufferLoad(weight_load.buffer, weight_base),
-        length=serial_ofm[3] * serial_kernel[0] * serial_kernel[1] * rc.extent,
-    )
+    channels = serial_ofm[3] if isinstance(serial_ofm[3], int) else serial_ofm[3].value
+
+    if accel_config == vapi.NpuAccelerator.Ethos_U65_512:
+        scale_bias_length = SCALE_BIAS_LENGTH * math.ceil(channels / 2)
+        scale_bias2_length = SCALE_BIAS_LENGTH * math.floor(channels / 2)
+
+        serial_scale_bias = SerialAddressRange(
+            address=tvm.tir.BufferLoad(scale_bias_load.buffer, scale_bias_base),
+            length=scale_bias_length,
+        )
+        serial_scale_bias2 = SerialAddressRange(
+            address=tvm.tir.BufferLoad(
+                scale_bias_load.buffer, [scale_bias_base[0] + scale_bias_length]
+            ),
+            length=scale_bias2_length,
+        )
+
+        weight_length = (
+            channels * serial_kernel[0] * serial_kernel[1] * math.ceil(rc.extent.value / 2)
+        )
+        weight2_length = (
+            channels * serial_kernel[0] * serial_kernel[1] * math.floor(rc.extent.value / 2)
+        )
+
+        serial_weight = SerialAddressRange(
+            address=tvm.tir.BufferLoad(weight_load.buffer, weight_base),
+            length=weight_length,
+        )
+        serial_weight2 = SerialAddressRange(
+            address=tvm.tir.BufferLoad(weight_load.buffer, [weight_base[0] + weight_length]),
+            length=weight2_length,
+        )
+    else:
+        scale_bias_length = SCALE_BIAS_LENGTH * channels
+
+        serial_scale_bias = SerialAddressRange(
+            address=tvm.tir.BufferLoad(scale_bias_load.buffer, scale_bias_base),
+            length=scale_bias_length,
+        )
+        # Insert -1s into the spec to denote the absence of the other pointer
+        serial_scale_bias2 = SerialAddressRange(
+            address=tvm.tir.IntImm("int8", -1),
+            length=tvm.tir.IntImm("int8", -1),
+        )
+
+        weight_length = channels * serial_kernel[0] * serial_kernel[1] * rc.extent.value
+
+        serial_weight = SerialAddressRange(
+            address=tvm.tir.BufferLoad(weight_load.buffer, weight_base),
+            length=weight_length,
+        )
+        serial_weight2 = SerialAddressRange(
+            address=tvm.tir.IntImm("int8", -1),
+            length=tvm.tir.IntImm("int8", -1),
+        )
     # Get activation info
     serial_activation = SerialActivation(
         op=attrs["activation"], clip_min=attrs["clip_min"], clip_max=attrs["clip_max"]
@@ -96,8 +147,10 @@ def get_conv2d_params(stmt, producers_consumers):
             ofm=serial_ofm,
             kernel=serial_kernel,
             weight=serial_weight,
+            weight2=serial_weight2,
             weight_zero_point=attrs["weight_zero_point"],
             scale_bias=serial_scale_bias,
+            scale_bias2=serial_scale_bias2,
             padding=serial_padding,
             activation=serial_activation,
             rounding_mode=attrs["rounding_mode"],