Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Hexagon] Implement avg_pool2d slice op #11417

Merged
merged 13 commits into from
Jun 15, 2022
22 changes: 22 additions & 0 deletions python/tvm/topi/hexagon/slice_ops/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

""" Computes and Schedules for Hexagon slice ops. """

# pylint: disable=wildcard-import

from .avg_pool2d import avg_pool2d_compute, avg_pool2d_STIR_schedule
141 changes: 141 additions & 0 deletions python/tvm/topi/hexagon/slice_ops/avg_pool2d.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=invalid-name, unused-variable, unused-argument, too-many-locals

""" Compute and schedule for avg_pool2d slice op

Please note the following assumptions made by the implementation:

1) The input must be padded in advance to account for 'padding'. In addition,
both input and output must be padded as per the physical buffer layout.
2) The current implementation assumes 'count_include_pad' to be 'True'. It can be
modified to support 'False' case but the element count for the pooling window
must be pre-computed and provided as an input to reduce the run-time overhead.
3) 'padding' is ignored. It must be handled outside of the sliced op.
4) Please note that this implementation will not work if the output includes any
physical layout related padding as it can result into out-of-bound access
for the input.
"""

from tvm import te
from tvm import tir
from ..utils import get_layout_transform_fn


def validate_out_shape(out_shape, in_shape, kernel, stride, dilation):
"""Validate output shape"""
_, oh, ow, _ = out_shape
_, ih, iw, _ = in_shape
kh, kw = kernel
sh, sw = stride
dh, dw = dilation
if ih < (oh - 1) * sh + dh * (kh - 1) + 1:
raise RuntimeError("Output height is too large")
if iw < (ow - 1) * sw + dw * (kw - 1) + 1:
raise RuntimeError("Output width is too large")


def avg_pool2d_compute(A, out_shape, kernel, stride, dilation):
"""avg_pool2d compute"""
kh, kw = kernel
rh = te.reduce_axis((0, kh), name="rh")
rw = te.reduce_axis((0, kw), name="rw")
ob, oh, ow, oc = out_shape
if isinstance(ob, int):
validate_out_shape(out_shape, A.shape, kernel, stride, dilation)

sh, sw = stride
dh, dw = dilation
InvArea = float(1) / (kh * kw)

Lunderberg marked this conversation as resolved.
Show resolved Hide resolved
Sum = te.compute(
out_shape,
lambda b, h, w, c: te.sum(
A[b, h * sh + dh * rh, w * sw + dw * rw, c].astype("float32"), axis=[rh, rw]
),
name="sum",
)
Avg = te.compute(
out_shape, lambda b, h, w, c: (Sum[b, h, w, c] * InvArea).astype(A.dtype), name="avg"
)
return Avg


def STIR_schedule_nhwc_8h2w32c2w(outs, ins, output_layout: str, input_layout: str):
"""Schedule for input and output layout nhwc-8h2w32c2w"""
func = te.create_prim_func([ins, outs])
s = tir.Schedule(func)
Sum = s.get_block("sum")
Avg = s.get_block("avg")

input_transform_fn = get_layout_transform_fn(input_layout)
output_transform_fn = get_layout_transform_fn(output_layout)
s.transform_layout(Sum, ("read", 0), input_transform_fn)
s.transform_layout(Avg, ("write", 0), output_transform_fn)
Comment on lines +87 to +88
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not necessary for this PR but just sharing: there's a new API sugar for transform_layout that allows you to address the block and buffer by name, e.g.

sch.transform_layout(block="tensor", buffer="placeholder", index_map=_int8_nhwc_8h8w32c_map)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the suggestion, @csullivan! I have tried it in the past but couldn't get the new API sugar to work due to the intermediate compute in avgpool.


# Schedule 'Avg'
n, h, w, c = s.get_loops(Avg)
ho, hi = s.split(h, [None, 8])
wo, wi = s.split(w, [None, 4])
wio, wii = s.split(wi, [None, 2])
co, ci = s.split(c, [None, 32])
s.reorder(n, ho, wo, co, hi, wio, ci, wii)
ci_wii = s.fuse(ci, wii)
s.vectorize(ci_wii)

# Schedule 'Sum'
s.compute_at(Sum, wio)
Sum_axis = s.get_loops(Sum)
s.reorder(Sum_axis[-2], Sum_axis[-1], Sum_axis[-4], Sum_axis[-3])
ci_wii = s.fuse(Sum_axis[-4], Sum_axis[-3])
# s.vectorize(ci_wii) # Doesn't work
return s


def STIR_schedule_n11c_1024c(outs, ins, output_layout: str, input_layout: str):
"""Schedule for output layout: n11c-1024c, input layout: nhwc-8h2w32c2w"""
func = te.create_prim_func([ins, outs])
s = tir.Schedule(func)
Sum = s.get_block("sum")
Avg = s.get_block("avg")

input_transform_fn = get_layout_transform_fn(input_layout)
output_transform_fn = get_layout_transform_fn(output_layout)
s.transform_layout(Sum, ("read", 0), input_transform_fn)
s.transform_layout(Avg, ("write", 0), output_transform_fn)

# Schedule 'Avg'
n, h, w, c = s.get_loops(Avg)
co, ci = s.split(c, [None, 1024])
cio, cii = s.split(ci, [None, 64])
s.vectorize(cii)

# Schedule 'Sum'
s.compute_at(Sum, cio)
Sum_axis = s.get_loops(Sum)
s.reorder(Sum_axis[-2], Sum_axis[-1], Sum_axis[-3])
# s.vectorize(Sum_axis[-3]) # Doesn't work
return s


def avg_pool2d_STIR_schedule(outs, ins, output_layout: str, input_layout: str):
"""STIR based schedule"""
if output_layout == "nhwc-8h2w32c2w-2d":
return STIR_schedule_nhwc_8h2w32c2w(outs, ins, output_layout, input_layout)
if output_layout == "n11c-1024c-2d":
return STIR_schedule_n11c_1024c(outs, ins, output_layout, input_layout)
raise RuntimeError(f"Unexpected layout '{output_layout}'")
52 changes: 52 additions & 0 deletions python/tvm/topi/hexagon/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=invalid-name
"""Common hexagon specific utilities"""
from tvm import te


def n11c_1024c_2d(n, h, w, c):
"""Return index map for n11c_1024 2d layout"""
return [n, h, w, c // 1024, te.AXIS_SEPARATOR, c % 1024]


def n11c_1024c_1d(n, h, w, c):
"""Return index map for n11c_1024 1d layout"""
return [n, h, w, c // 1024, c % 1024]


def nhwc_8h2w32c2w_2d(n, h, w, c):
"""Return index map for nhwc_8h2w32c2w 2d layout"""
return [n, h // 8, w // 4, c // 32, te.AXIS_SEPARATOR, h % 8, (w % 4) // 2, c % 32, w % 2]


def nhwc_8h2w32c2w_1d(n, h, w, c):
"""Return index map for nhwc_8h2w32c2w 1d layout"""
return [n, h // 8, w // 4, c // 32, h % 8, (w % 4) // 2, c % 32, w % 2]


def get_layout_transform_fn(layout):
"""Return index map function as per the layout string"""
if layout == "nhwc-8h2w32c2w-2d":
return nhwc_8h2w32c2w_2d
if layout == "nhwc-8h2w32c2w-1d":
return nhwc_8h2w32c2w_1d
if layout == "n11c-1024c-2d":
return n11c_1024c_2d
if layout == "n11c-1024c-1d":
return n11c_1024c_1d
raise RuntimeError(f"Unexpected layout '{layout}'")
23 changes: 21 additions & 2 deletions tests/python/contrib/test_hexagon/infrastructure.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=invalid-name

""" Hexagon testing infrastructure """

Expand Down Expand Up @@ -47,8 +48,7 @@ def allocate_hexagon_array(
numpy.prod(tensor_shape[dim_i:dim_f])
for dim_i, dim_f in zip(boundaries[:-1], boundaries[1:])
]

arr = tvm.nd.empty(physical_shape, dtype=dtype, device=dev)
arr = tvm.nd.empty(physical_shape, dtype=dtype, device=dev, mem_scope=mem_scope)

if data is not None:
arr.copyfrom(data.reshape(physical_shape))
Expand Down Expand Up @@ -228,3 +228,22 @@ def compute(n, ho, wo, ko, hi, wi, ki):
)

return output_shape, compute


def transform_numpy(arr_np, current_layout: str, new_layout: str):
"""Reshape and transpose numpy array according to the specified layout"""
if current_layout == "nhwc":
if new_layout == "nhwc":
return arr_np
if new_layout in ["nhwc-8h2w32c2w-2d", "nhwc-8h2w32c2w-1d"]:
n, h, w, c = arr_np.shape
return arr_np.reshape([n, h // 8, 8, w // 4, 2, 2, c // 32, 32]).transpose(
0, 1, 3, 6, 2, 4, 7, 5
)
if new_layout in ["n11c-1024c-2d", "n11c-1024c-1d"]:
n, h, w, c = arr_np.shape
assert h == 1 and w == 1, "The size of h and w must be 1"
return arr_np.reshape([n, 1, 1, c // 1024, 1024])

raise RuntimeError(f"Unexpected new_layout '{new_layout}'")
raise RuntimeError(f"Unexpected current_layout '{current_layout}'")
Loading