From 1d545d0d8b65dce9177fa6e54c60dbef070b7bda Mon Sep 17 00:00:00 2001
From: Grant Watson <grant.watson@arm.com>
Date: Thu, 9 Dec 2021 13:47:24 +0000
Subject: [PATCH] Tutorial for running TVM on Arm(R) Cortex(R)-M55 CPU and
 Ethos(TM)-U55 NPU (#9307)

* Tutorial for running TVM on Arm(R) Cortex(R)-M55 CPU and Ethos(TM)-U55 NPU

Change-Id: If1e1134b56639021036862e8ea65a8e9d33dceb7

* Tutorial for running TVM on Arm(R) Cortex(R)-M55 CPU and Ethos(TM)-U55 NPU

- Moved tutorials/micro/cortex_m_ethosu.py to gallery/how_to/work_with_microtvm/micro_ethosu.py

Change-Id: Ib554df6649b7313d4414187d5334ec5b03f35f33

* [micronpu] Update and cleanup tutorials.

- Moved tutorials/micro/cortex_m_ethosu.py to gallery/how_to/work_with_microtvm/micro_ethosu.py
- Replaced full linker script document with a link to the linker script on github

Change-Id: Ic77648a4fc3dd76161d689774d21a3347a577b90

* [micronpu] Update and cleanup tutorial

- Replace Makefile code with a link to Makefile on github
- Replace header files with a link to header files on github
- Update demo.c with changes introduced by Device API
- Update tvmc command line arguments

Change-Id: If6a254b368550c0f3effb8d1cb15f062279964e2
---
 docs/conf.py                                  |   6 +
 .../how_to/work_with_microtvm/micro_ethosu.py | 536 ++++++++++++++++++
 2 files changed, 542 insertions(+)
 create mode 100644 gallery/how_to/work_with_microtvm/micro_ethosu.py

diff --git a/docs/conf.py b/docs/conf.py
index 893d89c26156..e74df6cf1e0e 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -310,6 +310,12 @@ def git_describe_version(original_version):
         "use_pass_instrument.py",
         "bring_your_own_datatypes.py",
     ],
+    "micro": [
+        "micro_autotune.py",
+        "micro_reference_vm.py",
+        "micro_tflite.py",
+        "micro_ethosu.py",
+    ],
 }
 
 
diff --git a/gallery/how_to/work_with_microtvm/micro_ethosu.py b/gallery/how_to/work_with_microtvm/micro_ethosu.py
new file mode 100644
index 000000000000..0abd14d933eb
--- /dev/null
+++ b/gallery/how_to/work_with_microtvm/micro_ethosu.py
@@ -0,0 +1,536 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Running TVM on bare metal Arm(R) Cortex(R)-M55 CPU and Ethos(TM)-U55 NPU
+========================================================================
+**Author**:
+`Grant Watson <https://github.com/grant-arm>`_
+
+This section contains an example of how to use TVM to run a model
+on an Arm(R) Cortex(R)-M55 CPU and Ethos(TM)-U55 NPU, using bare metal.
+The Cortex(R)-M55 is a small, low-power CPU designed for use in embedded
+devices. The Ethos(TM)-U55 is a microNPU, specifically designed to accelerate
+ML inference in resource-constrained embedded devices.
+
+In order to run the demo application without having access to a Cortex(R)-M55
+and Ethos(TM)-U55 development board, we will be running our sample application
+on a Fixed Virtual Platform (FVP). The FVP based on Arm(R) Corstone(TM)-300
+software, models a hardware system containing a Cortex(R)-M55 and Ethos(TM)-U55.
+It provides a programmer's view that is suitable for software development.
+
+In this tutorial, we will be compiling a MobileNet v1 model and instructing
+TVM to offload operators to the Ethos(TM)-U55 where possible.
+"""
+
+################################################################################
+# Obtaining TVM
+# -------------
+#
+# To obtain TVM for you platform, please visit https://tlcpack.ai/ and follow the
+# instructions. Once TVM has been installed correctly, you should have access to
+# ``tvmc`` from the command line.
+#
+# Typing ``tvmc`` on the command line should display the following:
+#
+# .. code-block:: text
+#
+#     usage: tvmc [-h] [-v] [--version] {tune,compile,run} ...
+#
+#     TVM compiler driver
+#
+#     optional arguments:
+#       -h, --help          show this help message and exit
+#       -v, --verbose       increase verbosity
+#       --version           print the version and exit
+#
+#     commands:
+#       {tune,compile,run}
+#         tune              auto-tune a model
+#         compile           compile a model.
+#         run               run a compiled module
+#
+#     TVMC - TVM driver command-line interface
+#
+
+################################################################################
+# Installing additional python dependencies
+# -----------------------------------------
+#
+# In order to run the demo, you will need some additional python packages.
+# These can be installed by using the requirements.txt file below:
+#
+# .. code-block:: text
+#    :caption: requirements.txt
+#    :name: requirements.txt
+#
+#     attrs==21.2.0
+#     cloudpickle==2.0.0
+#     decorator==5.1.0
+#     ethos-u-vela==2.1.1
+#     flatbuffers==1.12
+#     lxml==4.6.3
+#     nose==1.3.7
+#     numpy==1.19.5
+#     Pillow==8.3.2
+#     psutil==5.8.0
+#     scipy==1.5.4
+#     synr==0.4
+#     tflite==2.4.0
+#     tornado==6.1
+#
+# These packages can be installed by running the following from the command line:
+#
+# .. code-block:: bash
+#
+#   pip install -r requirements.txt
+#
+
+################################################################################
+# Obtaining the Model
+# -------------------
+#
+# For this tutorial, we will be working with MobileNet v1.
+# MobileNet v1 is a convolutional neural network designed to classify images,
+# that has been optimized for edge devices. The model we will be using has been
+# pre-trained to classify images into one of 1001 different categories.
+# The network has an input image size of 224x224 so any input images will need
+# to be resized to those dimensions before being used.
+#
+# For this tutorial we will be using the model in Tflite format.
+#
+# .. code-block:: bash
+#
+#   mkdir -p ./build
+#   cd build
+#   wget https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz
+#   gunzip mobilenet_v1_1.0_224_quant.tgz
+#   tar xvf mobilenet_v1_1.0_224_quant.tar
+#
+
+################################################################################
+# Compiling the model for Arm(R) Cortex(R)-M55 CPU and Ethos(TM)-U55 NPU
+# ----------------------------------------------------------------------
+#
+# Once we've downloaded the MobileNet v1 model, the next step is to compile it.
+# To accomplish that, we are going to use ``tvmc compile``. The output we get from
+# the compilation process is a TAR package of the model compiled to the Model
+# Library Format (MLF) for our target platform. We will be able to run that model
+# on our target device using the TVM runtime.
+#
+# .. code-block:: bash
+#
+#   tvmc compile --target="ethos-u -accelerator_config=ethos-u55-256, c" \
+#                --target-c-mcpu=cortex-m55 \
+#                --runtime=crt \
+#                --executor=aot \
+#                --executor-aot-interface-api=c \
+#                --executor-aot-unpacked-api=1 \
+#                --pass-config tir.disable_vectorize=1 \
+#                ./mobilenet_v1_1.0_224_quant.tflite \
+#                --output-format=mlf
+#
+
+################################################################################
+# .. note:: Explanation of tvmc compile arguments:
+#
+#   * ``--target="ethos-u -accelerator_config=ethos-u55-256, c"`` : offload operators to the Ethos(TM)-U55 NPU where possible and fall back to using generated C code on the Cortex(R)-M where an operator is not supported on the NPU..
+#
+#   * ``--target-c-mcpu=cortex-m55`` : Cross-compile for the Cortex(R)-M55.
+#
+#   * ``--runtime=crt`` : Generate glue code to allow operators to work with C runtime.
+#
+#   * ``--executor=aot`` : Use Ahead Of Time compiltaion instead of the Graph Executor.
+#
+#   * ``--executor-aot-interface-api=c`` : Generate a C-style interface with structures designed for integrating into C apps at the boundary.
+#
+#   * ``--executor-aot-unpacked-api=1`` : Use the unpacked API internally.
+#
+#   * ``--pass-config tir.disable_vectorize=1`` : Disable vectorize since there are no standard vectorized types in C.
+#
+#   * ``./mobilenet_v1_1.0_224_quant.tflite`` : The TFLite model that is being compiled.
+#
+#   * ``--output-format=mlf`` : Output should be generated in the Model Library Format.
+#
+
+################################################################################
+# Extracting the generated code into the current directory
+# --------------------------------------------------------
+#
+# .. code-block:: bash
+#
+#   tar xvf module.tar
+#
+
+################################################################################
+# Getting ImageNet labels
+# -----------------------
+#
+# When running MobileNet v1 on an image, the result is an index in the range 0 to
+# 1000. In order to make our application a little more user friendly, instead of
+# just displaying the category index, we will display the associated label. We
+# will download these image labels into a text file now and use a python script
+# to include them in our C application later.
+#
+# .. code-block:: bash
+#
+#   curl -sS  https://raw.githubusercontent.com/tensorflow/tensorflow/master/tensorflow/lite/java/demo/app/src/main/assets/labels_mobilenet_quant_v1_224.txt \
+#   -o ./labels_mobilenet_quant_v1_224.txt
+#
+
+################################################################################
+# Getting the input image
+# -----------------------
+#
+# As input for this tutorial, we will use the image of a cat, but you can
+# substitute an image of your choosing.
+#
+# .. image:: https://s3.amazonaws.com/model-server/inputs/kitten.jpg
+#    :height: 224px
+#    :width: 224px
+#    :align: center
+#
+# We download the image into the build directory and we will use a python script
+# in the next step to convert the image into an array of bytes in a C header file.
+#
+# .. code-block:: bash
+#
+#   curl -sS https://s3.amazonaws.com/model-server/inputs/kitten.jpg -o ./kitten.jpg
+#
+
+################################################################################
+# Pre-processing the image
+# ------------------------
+#
+# The following script will create 2 C header files in the src directory:
+#
+# * ``inputs.h`` - The image supplied as an argument to the script will be converted
+#   to an array of integers for input to our MobileNet v1 model.
+# * ``outputs.h`` - An integer array of zeroes will reserve 1001 integer values
+#   for the output of inference.
+#
+# .. code-block:: python
+#    :caption: convert_image.py
+#    :name: convert_image.py
+#
+#     #!python ./convert_image.py
+#     import os
+#     import pathlib
+#     import re
+#     import sys
+#     from PIL import Image
+#     import numpy as np
+#
+#
+#     def create_header_file(name, section, tensor_name, tensor_data, output_path):
+#         """
+#         This function generates a header file containing the data from the numpy array provided.
+#         """
+#         file_path = pathlib.Path(f"{output_path}/" + name).resolve()
+#         # Create header file with npy_data as a C array
+#         raw_path = file_path.with_suffix(".h").resolve()
+#         with open(raw_path, "w") as header_file:
+#             header_file.write(
+#                 "#include <tvmgen_default.h>\n"
+#                 + f"const size_t {tensor_name}_len = {tensor_data.size};\n"
+#                 + f'uint8_t {tensor_name}[] __attribute__((section("{section}"), aligned(16))) = "'
+#             )
+#             data_hexstr = tensor_data.tobytes().hex()
+#             for i in range(0, len(data_hexstr), 2):
+#                 header_file.write(f"\\x{data_hexstr[i:i+2]}")
+#             header_file.write('";\n\n')
+#
+#
+#     def create_headers(image_name):
+#         """
+#         This function generates C header files for the input and output arrays required to run inferences
+#         """
+#         img_path = os.path.join("./", f"{image_name}")
+#
+#         # Resize image to 224x224
+#         resized_image = Image.open(img_path).resize((224, 224))
+#         img_data = np.asarray(resized_image).astype("float32")
+#
+#         # Convert input to NCHW
+#         img_data = np.transpose(img_data, (2, 0, 1))
+#
+#         # Create input header file
+#         input_data = img_data.astype(np.uint8)
+#         create_header_file("inputs", "ethosu_scratch", "input", input_data, "./include")
+#         # Create output header file
+#         output_data = np.zeros([1001], np.uint8)
+#         create_header_file(
+#             "outputs",
+#             "output_data_sec",
+#             "output",
+#             output_data,
+#             "./include",
+#         )
+#
+#
+#     if __name__ == "__main__":
+#         create_headers(sys.argv[1])
+#
+# Run the script from the command line:
+#
+# .. code-block:: bash
+#
+#   python convert_image.py ./kitten.jpg
+
+################################################################################
+# Pre-processing the labels
+# -------------------------
+#
+# The following script will create a ``labels.h`` header file in the src directory.
+# The labels.txt file that we downloaded previously will be turned
+# into an array of strings. This array will be used to display the label that
+# our image has been classified as.
+#
+# .. code-block:: python
+#    :caption: convert_labels.py
+#    :name: convert_labels.py
+#
+#     #!python ./convert_labels.py
+#     import os
+#     import pathlib
+#     import sys
+#
+#
+#     def create_labels_header(labels_file, section, output_path):
+#         """
+#         This function generates a header file containing the ImageNet labels as an array of strings
+#         """
+#         labels_path = pathlib.Path(labels_file).resolve()
+#         file_path = pathlib.Path(f"{output_path}/labels.h").resolve()
+#
+#         with open(labels_path) as f:
+#             labels = f.readlines()
+#
+#         with open(file_path, "w") as header_file:
+#             header_file.write(f'char* labels[] __attribute__((section("{section}"), aligned(16))) = {{')
+#
+#             for _, label in enumerate(labels):
+#                 header_file.write(f'"{label.rstrip()}",')
+#
+#             header_file.write("};\n")
+#
+#
+#     if __name__ == "__main__":
+#         create_labels_header(sys.argv[1], "ethosu_scratch", "./include")
+#
+# Run the script from the command line:
+#
+# .. code-block:: bash
+#
+#   python convert_labels.py
+
+################################################################################
+# Writing the demo application
+# ----------------------------
+#
+# The following C application will run a single inference of the MobileNet v1
+# model on the image that we downloaded and converted to an array of integers
+# previously. Since the model was compiled with a target of "ethos-u ...",
+# operators supported by the Ethos(TM)-U55 NPU will be offloaded for acceleration.
+# Once the application is built and run, our test image should be correctly
+# classied as a "tabby" and the result should be displayed on the console.
+# This file should be placed in ``./src``
+#
+# .. code-block:: c
+#    :caption: demo.c
+#    :name: demo.c
+#
+#     #include <stdio.h>
+#     #include <tvm_runtime.h>
+#
+#     #include "ethosu_mod.h"
+#     #include "uart.h"
+#
+#     // Header files generated by convert_image.py and convert_labels.py
+#     #include "inputs.h"
+#     #include "labels.h"
+#     #include "outputs.h"
+#
+#     int abs(int v) { return v * ((v > 0) - (v < 0)); }
+#
+#     int main(int argc, char** argv) {
+#       uart_init();
+#       printf("Starting Demo\n");
+#       EthosuInit();
+#
+#       printf("Allocating memory\n");
+#       StackMemoryManager_Init(&app_workspace, g_aot_memory, WORKSPACE_SIZE);
+#
+#       printf("Running inference\n");
+#       struct tvmgen_default_outputs outputs = {
+#           .output = output,
+#       };
+#       struct tvmgen_default_inputs inputs = {
+#           .input = input,
+#       };
+#       struct ethosu_driver* driver = ethosu_reserve_driver();
+#       struct tvmgen_default_devices devices = {
+#           .ethos_u = driver,
+#       };
+#       tvmgen_default_run(&inputs, &outputs, &devices);
+#       ethosu_release_driver(driver);
+#
+#       // Calculate index of max value
+#       uint8_t max_value = 0;
+#       int32_t max_index = -1;
+#       for (unsigned int i = 0; i < output_len; ++i) {
+#         if (output[i] > max_value) {
+#           max_value = output[i];
+#           max_index = i;
+#         }
+#       }
+#       printf("The image has been classified as '%s'\n", labels[max_index]);
+#
+#       // The FVP will shut down when it receives "EXITTHESIM" on the UART
+#       printf("EXITTHESIM\n");
+#       while (1 == 1)
+#         ;
+#       return 0;
+#     }
+#
+#
+# In addition, you will need these header files from github in your ``./include`` directory:
+#
+# `include files <https://github.com/apache/tvm/tree/main/apps/microtvm/ethosu/include>`_
+
+################################################################################
+# Creating the linker script
+# --------------------------
+#
+# We need to create a linker script that will be used when we build our application
+# in the following section. The linker script tells the linker where everything
+# should be placed in memory. The corstone300.ld linker script below should be
+# placed in your working directory.
+#
+# An example linker script for the FVP can be found here
+# `corstone300.ld <https://github.com/apache/tvm/blob/main/apps/microtvm/ethosu/corstone300.ld>`_
+
+################################################################################
+# .. note::
+#
+#   The code generated by TVM will place the model weights and the Arm(R)
+#   Ethos(TM)-U55 command stream in a section named ``ethosu_scratch``.
+#   For a model the size of MobileNet v1, the weights and command stream will not
+#   fit into the limited SRAM available. For this reason it's important that the
+#   linker script places the ``ethosu_scratch`` section into DRAM (DDR).
+
+################################################################################
+# .. note::
+#
+#   Before building and running the application, you will need to update your
+#   PATH environment variable to include the path to cmake 3.19.5 and the FVP.
+#   For example if you've installed these in ``/opt/arm`` , then you would do
+#   the following:
+#
+#   ``export PATH=/opt/arm/FVP_Corstone_SSE-300_Ethos-U55/models/Linux64_GCC-6.4:/opt/arm/cmake/bin:$PATH``
+#
+
+################################################################################
+# Building the demo application using make
+# ----------------------------------------
+#
+# We can now build the demo application using make. The Makefile should be placed
+# in your working directory before running ``make`` on the command line:
+#
+# An example Makefile can be found here:
+# `Makefile <https://github.com/apache/tvm/blob/main/apps/microtvm/ethosu/Makefile>`_
+
+################################################################################
+# Running the demo application
+# ----------------------------
+#
+# Finally, we can run our demo appliction on the Fixed Virtual Platform (FVP),
+# by using the following command:
+#
+# .. code-block:: bash
+#
+#     FVP_Corstone_SSE-300_Ethos-U55 -C cpu0.CFGDTCMSZ=15 \
+#     -C cpu0.CFGITCMSZ=15 -C mps3_board.uart0.out_file=\"-\" -C mps3_board.uart0.shutdown_tag=\"EXITTHESIM\" \
+#     -C mps3_board.visualisation.disable-visualisation=1 -C mps3_board.telnetterminal0.start_telnet=0 \
+#     -C mps3_board.telnetterminal1.start_telnet=0 -C mps3_board.telnetterminal2.start_telnet=0 -C mps3_board.telnetterminal5.start_telnet=0 \
+#     -C ethosu.extra_args="--fast" \
+#     -C ethosu.num_macs=256 ./build/demo
+#
+# You should see the following output displayed in your console window:
+#
+# .. code-block:: text
+#
+#     telnetterminal0: Listening for serial connection on port 5000
+#     telnetterminal1: Listening for serial connection on port 5001
+#     telnetterminal2: Listening for serial connection on port 5002
+#     telnetterminal5: Listening for serial connection on port 5003
+#
+#         Ethos-U rev dedfa618 --- Jan 12 2021 23:03:55
+#         (C) COPYRIGHT 2019-2021 Arm Limited
+#         ALL RIGHTS RESERVED
+#
+#     Starting Demo
+#     ethosu_init. base_address=0x48102000, fast_memory=0x0, fast_memory_size=0, secure=1, privileged=1
+#     ethosu_register_driver: New NPU driver at address 0x20000de8 is registered.
+#     CMD=0x00000000
+#     Soft reset NPU
+#     Allocating memory
+#     Running inference
+#     ethosu_find_and_reserve_driver - Driver 0x20000de8 reserved.
+#     ethosu_invoke
+#     CMD=0x00000004
+#     QCONFIG=0x00000002
+#     REGIONCFG0=0x00000003
+#     REGIONCFG1=0x00000003
+#     REGIONCFG2=0x00000013
+#     REGIONCFG3=0x00000053
+#     REGIONCFG4=0x00000153
+#     REGIONCFG5=0x00000553
+#     REGIONCFG6=0x00001553
+#     REGIONCFG7=0x00005553
+#     AXI_LIMIT0=0x0f1f0000
+#     AXI_LIMIT1=0x0f1f0000
+#     AXI_LIMIT2=0x0f1f0000
+#     AXI_LIMIT3=0x0f1f0000
+#     ethosu_invoke OPTIMIZER_CONFIG
+#     handle_optimizer_config:
+#     Optimizer release nbr: 0 patch: 1
+#     Optimizer config cmd_stream_version: 0 macs_per_cc: 8 shram_size: 48 custom_dma: 0
+#     Optimizer config Ethos-U version: 1.0.6
+#     Ethos-U config cmd_stream_version: 0 macs_per_cc: 8 shram_size: 48 custom_dma: 0
+#     Ethos-U version: 1.0.6
+#     ethosu_invoke NOP
+#     ethosu_invoke NOP
+#     ethosu_invoke NOP
+#     ethosu_invoke COMMAND_STREAM
+#     handle_command_stream: cmd_stream=0x61025be0, cms_length 1181
+#     QBASE=0x0000000061025be0, QSIZE=4724, base_pointer_offset=0x00000000
+#     BASEP0=0x0000000061026e60
+#     BASEP1=0x0000000060002f10
+#     BASEP2=0x0000000060002f10
+#     BASEP3=0x0000000061000fb0
+#     BASEP4=0x0000000060000fb0
+#     CMD=0x000Interrupt. status=0xffff0022, qread=4724
+#     CMD=0x00000006
+#     00006
+#     CMD=0x0000000c
+#     ethosu_release_driver - Driver 0x20000de8 released
+#     The image has been classified as 'tabby'
+#     EXITTHESIM
+#     Info: /OSCI/SystemC: Simulation stopped by user.
+#
+# You should see near the end of the output that the image has been correctly
+# classified as 'tabby'.