Skip to content

Commit

Permalink
AArch64 base algorithm refactoring in LLVM (apache#6907)
Browse files Browse the repository at this point in the history
* AArch64 base algorithm refactoring in LLVM

- I refactored the assembly in arm_cpu/tensor_intrin.py to use LLVM+TIR
- Removed the `interleave` boolean parameter in the intrinsic to switch
among two different interleaving modes. LLVM will now take care of
interleaving the instructions
- Applied the changes accordingly to conv2d_gemm.py to call the right
instrinsic

Note: I found LLVM very sensible to the choice of the `-mcpu`.
So, in order to preserve performance, it is important to specify the
right `-mcpu` when creating the LLVM target

* Fix linting

* Fix linting -2

* Fixing comments

* Address review comments

* Fix spaces around ':' in docstrings
  • Loading branch information
Giuseppe Rossini authored and Trevor Morris committed Dec 4, 2020
1 parent b302b76 commit c9a3efb
Show file tree
Hide file tree
Showing 2 changed files with 348 additions and 413 deletions.
15 changes: 3 additions & 12 deletions python/tvm/topi/arm_cpu/conv2d_gemm.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@
from ..utils import get_const_tuple, get_const_int
from ..nn.utils import get_pad_tuple
from .tensor_intrin import (
gemm_quantized,
gemm_quantized_impl,
gemm_4x4_int8_int8_int32,
gemm_acc_4x4_int8_int8_int32,
gemm_acc_nx16_int8_int8_int32,
gemm_acc_2x2_int8_int8_int32,
Expand All @@ -51,11 +50,8 @@ def configure_knobs(cfg, M, K):

if not is_dotprod_available():
cfg.define_knob("gemm_quantized_unroll", [True, False])
cfg.define_knob("gemm_quantized_interleave", [True, False])

if cfg.is_fallback:
cfg["gemm_quantized_unroll"] = OtherOptionEntity(False)
cfg["gemm_quantized_interleave"] = OtherOptionEntity(True)


# Compute function
Expand Down Expand Up @@ -361,14 +357,9 @@ def schedule_conv2d_gemm_interleaved(cfg, s, out, final_out):
elif is_aarch64_arm():
s[C_interleaved].reorder(yi, xi)
K = A_interleaved_input.shape[2]
assert in_type in ["int8", "uint8"], "Only int8 and uint8 gemm are supported"
unroll = cfg["gemm_quantized_unroll"].val
interleave = cfg["gemm_quantized_interleave"].val
gemm = gemm_quantized(M, N, K, unroll, interleave, in_type, out_type)
s[C_interleaved].pragma(
b_outer_gemm_fused,
"import_llvm",
gemm_quantized_impl(M, N, K, unroll, interleave, in_type),
)
gemm = gemm_4x4_int8_int8_int32(M, N, K, unroll, in_type)
s[C_interleaved].tensorize(yi, gemm)

# Output transform
Expand Down
Loading

0 comments on commit c9a3efb

Please sign in to comment.