From 463eda2cb5b9af1ad8dd18373600cd634896e66b Mon Sep 17 00:00:00 2001
From: Chris Sidebottom <christopher.sidebottom@arm.com>
Date: Mon, 24 May 2021 08:39:04 +0000
Subject: [PATCH] Introduce --interface-api={c,packed} parameter

This introduces structures generated to provide a documented and stable user
friendly interface to a TVM generated model, as can be seen in the AOT
demo application:
```
struct tvm_default_inputs inputs = {
  .input_1 = input_data,
};
struct tvm_default_outputs outputs = {
  .output = output_data,
};
int ret_val = tvm_default_run(&inputs, &outputs, NULL, NULL);
```

To facilitate this, some other changes are included:
* Removed dependency on `aot_executor.{c,h}` in tests, pending the
discussion in the interface RFC as to whether we keep them.
* Moved creation of test DLTensor's into the AOT test utils, in future this
can be replaced by loading via the Python API or otherwise
* Introduce `parametrize_aot_options` which can be used to test
permutations of AOT which work together - for now this filters C
interface and packed operators
* Updated demo application to generate the header for demonstration
purposes, we should consider porting the demo application to Model
Library Format and using the toolchain in the Zephyr App via CMake
instead?

This patch builds upon the improvements @giuseros made to AOT testing
and will greatly benefit from name mangling from #8014
---
 apps/microtvm/zephyr/aot_demo/src/main.c      |  11 +-
 python/tvm/micro/interface_api.py             |  62 ++++
 python/tvm/micro/model_library_format.py      |  45 ++-
 src/relay/backend/aot_executor_codegen.cc     |   6 +-
 src/runtime/meta_data.h                       |   9 +-
 src/target/source/source_module.cc            |  70 +++--
 src/target/target_kind.cc                     |   2 +
 tests/micro/zephyr/test_zephyr_aot.py         |   6 +-
 tests/python/relay/aot/aot_test.mk            |   3 +-
 tests/python/relay/aot/aot_test_utils.py      | 256 ++++++++++++-----
 tests/python/relay/aot/test_crt_aot.py        | 264 +++++++++++-------
 .../test_micro_model_library_format.py        |  39 +--
 12 files changed, 563 insertions(+), 210 deletions(-)
 create mode 100644 python/tvm/micro/interface_api.py

diff --git a/apps/microtvm/zephyr/aot_demo/src/main.c b/apps/microtvm/zephyr/aot_demo/src/main.c
index b92366a7098b9..7f7fd2d611089 100644
--- a/apps/microtvm/zephyr/aot_demo/src/main.c
+++ b/apps/microtvm/zephyr/aot_demo/src/main.c
@@ -32,6 +32,7 @@
 
 #include "input_data.h"
 #include "output_data.h"
+#include "tvm_default.h"
 #include "zephyr_uart.h"
 
 #ifdef CONFIG_ARCH_POSIX
@@ -194,18 +195,18 @@ void main(void) {
   }
   TVMLogf("Zephyr AOT Runtime\n");
 
-  void* inputs[1] = {
-      input_data,
+  struct tvm_default_inputs inputs = {
+      .input_1 = input_data,
   };
-  void* outputs[1] = {
-      output_data,
+  struct tvm_default_outputs outputs = {
+      .output = output_data,
   };
 
   StackMemoryManager_Init(&app_workspace, g_aot_memory, WORKSPACE_SIZE);
 
   double elapsed_time = 0;
   TVMPlatformTimerStart();
-  int ret_val = tvm_runtime_run(&network, inputs, outputs);
+  int ret_val = tvm_default_run(&inputs, &outputs, NULL, NULL);
   TVMPlatformTimerStop(&elapsed_time);
 
   if (ret_val != 0) {
diff --git a/python/tvm/micro/interface_api.py b/python/tvm/micro/interface_api.py
new file mode 100644
index 0000000000000..680fc1170582e
--- /dev/null
+++ b/python/tvm/micro/interface_api.py
@@ -0,0 +1,62 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+
+
+def _emit_brief(header_file, model_name, description):
+    header_file.write("/*!\n")
+    header_file.write(f" * \\brief TVM {model_name} model {description} \n")
+    header_file.write(" */\n")
+
+
+def generate_c_interface_header(model_name, inputs, outputs, output_path):
+    metadata_header = os.path.join(output_path, f"tvm_{model_name}.h")
+    with open(metadata_header, "w") as header_file:
+        _emit_brief(header_file, model_name, "input tensors")
+        header_file.write(f"struct tvm_{model_name}_inputs {{\n")
+        for input_name in inputs:
+            header_file.write(f"\tvoid* {input_name};\n")
+        header_file.write("};\n\n")
+
+        _emit_brief(header_file, model_name, "output tensors")
+        header_file.write(f"struct tvm_{model_name}_outputs {{\n")
+        for output_name in outputs:
+            header_file.write(f"\tvoid* {output_name};\n")
+        header_file.write("};\n\n")
+
+        _emit_brief(header_file, model_name, "memory blocks")
+        header_file.write(f"struct tvm_{model_name}_memory {{\n")
+        header_file.write("};\n\n")
+
+        _emit_brief(header_file, model_name, "device configurations")
+        header_file.write(f"struct tvm_{model_name}_devices {{\n")
+        header_file.write("};\n\n")
+
+        header_file.write("/*!\n")
+        header_file.write(f" * \\brief TVM {model_name} model run function \n")
+        header_file.write(f" * \\param inputs Input tensors for the model \n")
+        header_file.write(f" * \\param outputs Output tensors for the model \n")
+        header_file.write(f" * \\param memory Memory blocks for the model to use \n")
+        header_file.write(f" * \\param devices Devices for the model to use \n")
+        header_file.write(" */\n")
+        header_file.write(f"int tvm_{model_name}_run(\n")
+        header_file.write(f"\tstruct tvm_{model_name}_inputs* inputs,\n")
+        header_file.write(f"\tstruct tvm_{model_name}_outputs* outputs,\n")
+        header_file.write(f"\tstruct tvm_{model_name}_memory* memory,\n")
+        header_file.write(f"\tstruct tvm_{model_name}_devices* devices\n")
+        header_file.write(");\n")
diff --git a/python/tvm/micro/model_library_format.py b/python/tvm/micro/model_library_format.py
index 1cc3adf9ae07e..5c2fbbb25e81d 100644
--- a/python/tvm/micro/model_library_format.py
+++ b/python/tvm/micro/model_library_format.py
@@ -23,9 +23,11 @@
 import re
 import tarfile
 
+from .interface_api import generate_c_interface_header
 from ..contrib import utils
 from ..relay.backend import executor_factory
 from ..relay import param_dict
+from tvm.ir.type import TupleType
 
 # This should be kept identical to runtime::symbol::tvm_module_main
 MAIN_FUNC_NAME_STR = "__tvm_main__"
@@ -46,7 +48,6 @@ def _populate_codegen_dir(mod, codegen_dir: str):
         Path to the codegen directory on disk.
     """
     dso_modules = mod._collect_dso_modules()
-    dso_module_handles = [m.handle.value for m in dso_modules]
     non_dso_modules = mod._collect_from_import_tree(lambda m: m not in dso_modules)
     if non_dso_modules:
         raise UnsupportedInModelLibraryFormatError(
@@ -203,6 +204,42 @@ def _build_function_memory_map(function_metadata):
     return ret
 
 
+def _get_main_relay_func(mod: executor_factory.ExecutorFactoryModule):
+    main_func = mod.function_metadata[MAIN_FUNC_NAME_STR]
+    target = list(main_func.relay_primfuncs.keys())[0]
+    return main_func.relay_primfuncs[target]
+
+
+def _convert_tuple_to_outputs(ret_type, offset=0):
+    outputs = []
+    added_fields = len(ret_type.fields)
+    for output_index in range(added_fields):
+        next_output = offset + len(outputs)
+        if isinstance(ret_type.fields[output_index], TupleType):
+            outputs.extend(_convert_tuple_to_outputs(ret_type.fields[output_index], next_output))
+        else:
+            outputs.append(f"output{next_output}")
+    return outputs
+
+
+def _get_inputs_and_outputs_from_module(mod):
+    main_func = _get_main_relay_func(mod)
+    inputs = [argument.name_hint for argument in main_func.params]
+
+    outputs = ["output"]
+    if isinstance(main_func.ret_type, TupleType):
+        outputs = _convert_tuple_to_outputs(main_func.ret_type)
+
+    return inputs, outputs
+
+
+def _should_generate_interface_header(mod):
+    for _, target in mod.target.items():
+        if "interface-api" in target.attrs and target.attrs["interface-api"] == "c":
+            return True
+    return False
+
+
 def export_model_library_format(mod: executor_factory.ExecutorFactoryModule, file_name):
     """Export the build artifact in Model Library Format.
 
@@ -242,6 +279,12 @@ def export_model_library_format(mod: executor_factory.ExecutorFactoryModule, fil
     os.mkdir(codegen_dir_path)
     _populate_codegen_dir(mod.lib, codegen_dir_path)
 
+    if _should_generate_interface_header(mod):
+        include_path = os.path.join(codegen_dir_path, "host/include")
+        os.mkdir(include_path)
+        inputs, outputs = _get_inputs_and_outputs_from_module(mod)
+        generate_c_interface_header(mod.libmod_name, inputs, outputs, include_path)
+
     parameters_dir_path = tempdir.relpath("parameters")
     os.mkdir(parameters_dir_path)
     param_filename = os.path.join(parameters_dir_path, f"{mod.libmod_name}.params")
diff --git a/src/relay/backend/aot_executor_codegen.cc b/src/relay/backend/aot_executor_codegen.cc
index 66294d1dd0767..acd2373988a78 100644
--- a/src/relay/backend/aot_executor_codegen.cc
+++ b/src/relay/backend/aot_executor_codegen.cc
@@ -544,7 +544,7 @@ class AOTExecutorCodegen : public ExprVisitor {
   /*! \brief mod */
   runtime::Module* mod_;
   /*! \brief list of input expressions (i.e., variable passed by the user) */
-  std::vector<Expr> input_vars_;
+  std::vector<Var> input_vars_;
   /*! \brief input and output variables belonging to the main function signature */
   Array<tir::Var> main_signature_;
   /*! \brief target device */
@@ -652,8 +652,8 @@ class AOTExecutorCodegen : public ExprVisitor {
       ret.lowered_funcs.Set(target_host_str, IRModule(symbol_map));
     }
     ret.function_metadata = std::move(function_metadata_);
-    ret.metadata =
-        runtime::Metadata(input_vars_.size(), return_sid_.size(), runtime::kTvmExecutorAot);
+
+    ret.metadata = runtime::Metadata(input_vars_, return_sid_.size(), runtime::kTvmExecutorAot);
     return ret;
   }
 };
diff --git a/src/runtime/meta_data.h b/src/runtime/meta_data.h
index 495b3f22e6adb..575d5c13db6af 100644
--- a/src/runtime/meta_data.h
+++ b/src/runtime/meta_data.h
@@ -26,6 +26,7 @@
 
 #include <dmlc/io.h>
 #include <dmlc/json.h>
+#include <tvm/relay/expr.h>
 #include <tvm/runtime/executor_info.h>
 #include <tvm/runtime/module.h>
 #include <tvm/runtime/ndarray.h>
@@ -46,8 +47,8 @@ namespace runtime {
  */
 class MetadataNode : public Object {
  public:
-  /*! \brief number of inputs of the main function */
-  int num_inputs = 1;
+  /*! \brief input information for the main function */
+  Array<tvm::relay::Var> inputs;
   /*! \brief number of outputs of the main function */
   int num_outputs = 1;
   /*! \brief the executor to be used to run the model */
@@ -63,9 +64,9 @@ class MetadataNode : public Object {
  */
 class Metadata : public ObjectRef {
  public:
-  TVM_DLL Metadata(int num_inputs, int num_outputs, String executor) {
+  TVM_DLL Metadata(Array<tvm::relay::Var> inputs, int num_outputs, String executor) {
     auto n = make_object<MetadataNode>();
-    n->num_inputs = num_inputs;
+    n->inputs = inputs;
     n->num_outputs = num_outputs;
     n->executor = executor;
     data_ = std::move(n);
diff --git a/src/target/source/source_module.cc b/src/target/source/source_module.cc
index 992df61980f82..c62842718aa3e 100644
--- a/src/target/source/source_module.cc
+++ b/src/target/source/source_module.cc
@@ -193,24 +193,24 @@ class CSourceCrtMetadataModuleNode : public runtime::ModuleNode {
   }
 
   void GenerateEntrypointForUnpackedAPI() {
-    code_ << "TVM_DLL int32_t " << ::tvm::runtime::symbol::tvm_run_func_prefix << "(";
-    int total_args = (metadata_->num_inputs + metadata_->num_outputs);
-    for (int i = 0; i < total_args; ++i) {
-      code_ << "arg" << i;
+    code_ << "TVM_DLL int " << ::tvm::runtime::symbol::tvm_run_func_prefix << "(";
+    unsigned int total_args = (metadata_->inputs.size() + metadata_->num_outputs);
+    for (unsigned int i = 0; i < total_args; ++i) {
+      code_ << "void* arg" << i;
       if (i + 1 != total_args) {
         code_ << ",";
       }
     }
     code_ << ");\n";
-    code_ << "static int32_t " << ::tvm::runtime::symbol::tvm_module_main;
+    code_ << "int " << ::tvm::runtime::symbol::tvm_module_main;
     code_ << "(void* args, void* type_code, int num_args, void* out_value, void* "
              "out_type_code, void* resource_handle) {\n";
     code_ << "return " << ::tvm::runtime::symbol::tvm_run_func_prefix << "(";
-    for (int i = 0; i < metadata_->num_inputs; ++i) {
+    for (unsigned int i = 0; i < metadata_->inputs.size(); ++i) {
       code_ << "((DLTensor*)(((TVMValue*)args)[" << i << "].v_handle))[0].data,";
     }
     for (int i = 0; i < metadata_->num_outputs; ++i) {
-      int j = metadata_->num_inputs + i;
+      int j = metadata_->inputs.size() + i;
       code_ << "((DLTensor*)(((TVMValue*)args)[" << j << "].v_handle))[0].data";
       if (i + 1 != metadata_->num_outputs) {
         code_ << ",";
@@ -221,10 +221,10 @@ class CSourceCrtMetadataModuleNode : public runtime::ModuleNode {
   }
 
   void GenerateEntrypointForPackedAPI() {
-    code_ << "TVM_DLL int32_t " << ::tvm::runtime::symbol::tvm_run_func_prefix;
+    code_ << "TVM_DLL int " << ::tvm::runtime::symbol::tvm_run_func_prefix;
     code_ << "(void* args, void* type_code, int num_args, void* out_value, void* "
              "out_type_code, void* resource_handle);\n";
-    code_ << "static int32_t " << ::tvm::runtime::symbol::tvm_module_main;
+    code_ << "int " << ::tvm::runtime::symbol::tvm_module_main;
     code_ << "(void* args, void* type_code, int num_args, void* out_value, void* "
              "out_type_code, void* resource_handle) {\n";
     code_ << "return " << ::tvm::runtime::symbol::tvm_run_func_prefix;
@@ -232,22 +232,60 @@ class CSourceCrtMetadataModuleNode : public runtime::ModuleNode {
     code_ << "}\n";
   }
 
+  void GenerateCInterfaceEntrypoint() {
+    code_ << "#include <tvm_default.h>\n";
+    code_ << "TVM_DLL int32_t " << ::tvm::runtime::symbol::tvm_run_func_prefix << "(";
+    unsigned int total_args = (metadata_->inputs.size() + metadata_->num_outputs);
+    for (unsigned int i = 0; i < total_args; ++i) {
+      code_ << "void* arg" << i;
+      if (i + 1 != total_args) {
+        code_ << ",";
+      }
+    }
+    code_ << ");\n";
+    code_ << "int tvm_default_run(";
+    code_ << "struct tvm_default_inputs* inputs,"
+          << "struct tvm_default_outputs* outputs,"
+          << "struct tvm_default_memory* memory,"
+          << "struct tvm_default_devices* devices"
+          << ") {";
+    code_ << "return " << ::tvm::runtime::symbol::tvm_run_func_prefix << "(";
+    for (const auto& input : metadata_->inputs) {
+      code_ << "inputs->" << input->name_hint() << ",";
+    }
+    if (metadata_->num_outputs == 1) {
+      code_ << "outputs->output";
+    } else {
+      for (int i = 0; i < metadata_->num_outputs; ++i) {
+        code_ << "outputs->output" << i;
+        if (i + 1 != metadata_->num_outputs) {
+          code_ << ",";
+        }
+      }
+    }
+    code_ << ");\n";
+    code_ << "}\n";
+  }
+
   void GenerateAOTDescriptor() {
+    auto unpacked_api = target_->GetAttr<Bool>("unpacked-api").value_or(Bool(false));
+    auto interface_api = target_->GetAttr<String>("interface-api").value_or(String("packed"));
+
     code_ << "#include \"tvm/runtime/crt/internal/aot_executor/aot_executor.h\"\n";
     code_ << "#include \"tvm/runtime/c_runtime_api.h\"\n";
     code_ << "#ifdef __cplusplus\n";
     code_ << "extern \"C\"\n";
     code_ << "#endif\n";
-    if (target_->GetAttr<Bool>("unpacked-api").value_or(Bool(false))) {
-      GenerateEntrypointForUnpackedAPI();
+    if (unpacked_api) {
+      if (interface_api == "c") {
+        GenerateCInterfaceEntrypoint();
+      } else {
+        GenerateEntrypointForUnpackedAPI();
+      }
     } else {
+      ICHECK_EQ(interface_api, "packed") << "Packed interface required for packed operators";
       GenerateEntrypointForPackedAPI();
     }
-    code_ << "const tvm_model_t network = {\n"
-          << "    .run_func = &" << ::tvm::runtime::symbol::tvm_module_main << ",\n"
-          << "    .num_input_tensors = " << metadata_->num_inputs << ",\n"
-          << "    .num_output_tensors = " << metadata_->num_outputs << ", \n"
-          << "};\n";
   }
 
   void CreateSource() {
diff --git a/src/target/target_kind.cc b/src/target/target_kind.cc
index b9d9706773f7c..bcf1b3fb17ed9 100644
--- a/src/target/target_kind.cc
+++ b/src/target/target_kind.cc
@@ -299,6 +299,7 @@ TVM_REGISTER_TARGET_KIND("llvm", kDLCPU)
     .add_attr_option<String>("runtime")
     .add_attr_option<Bool>("link-params", Bool(false))
     .add_attr_option<Bool>("unpacked-api")
+    .add_attr_option<String>("interface-api")
     .set_default_keys({"cpu"});
 
 TVM_REGISTER_TARGET_KIND("c", kDLCPU)
@@ -310,6 +311,7 @@ TVM_REGISTER_TARGET_KIND("c", kDLCPU)
     .add_attr_option<String>("executor")
     .add_attr_option<Integer>("workspace-byte-alignment")
     .add_attr_option<Bool>("unpacked-api")
+    .add_attr_option<String>("interface-api")
     .set_default_keys({"cpu"});
 
 TVM_REGISTER_TARGET_KIND("cuda", kDLCUDA)
diff --git a/tests/micro/zephyr/test_zephyr_aot.py b/tests/micro/zephyr/test_zephyr_aot.py
index afdbdc590de0e..29854b21d7485 100644
--- a/tests/micro/zephyr/test_zephyr_aot.py
+++ b/tests/micro/zephyr/test_zephyr_aot.py
@@ -34,6 +34,7 @@
 from tvm.micro.contrib import zephyr
 from tvm.contrib import utils
 from tvm.contrib.download import download_testdata
+from tvm.micro.interface_api import generate_c_interface_header
 
 import conftest
 
@@ -181,7 +182,9 @@ def test_tflite(platform, west_cmd, skip_build, tvm_debug):
         tflite_model, shape_dict={"input_1": input_shape}, dtype_dict={"input_1 ": "float32"}
     )
 
-    target = tvm.target.target.micro(model, options=["-link-params=1", "--executor=aot"])
+    target = tvm.target.target.micro(
+        model, options=["-link-params=1", "--executor=aot", "--unpacked-api=1", "--interface-api=c"]
+    )
     with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
         lowered = relay.build(relay_mod, target, params=params)
 
@@ -192,6 +195,7 @@ def test_tflite(platform, west_cmd, skip_build, tvm_debug):
     )
     sample = np.load(sample_path)
     model_files_path = os.path.join(runtime_path, "include")
+    generate_c_interface_header(lowered.libmod_name, ["input_1"], ["output"], model_files_path)
     _create_header_file((f"input_data"), sample, model_files_path)
     _create_header_file(
         "output_data", np.zeros(shape=output_shape, dtype="float32"), model_files_path
diff --git a/tests/python/relay/aot/aot_test.mk b/tests/python/relay/aot/aot_test.mk
index 793a8b1ea69a9..23d90e38974be 100644
--- a/tests/python/relay/aot/aot_test.mk
+++ b/tests/python/relay/aot/aot_test.mk
@@ -35,7 +35,8 @@ PKG_CFLAGS = ${PKG_COMPILE_OPTS} \
 	-I$(DMLC_CORE)/include \
 	-I$(TVM_ROOT)/3rdparty/dlpack/include \
 	-I$(AOT_ROOT)\
-	-I$(build_dir)
+	-I$(build_dir) \
+	-I$(CODEGEN_ROOT)/host/include
 
 $(ifeq VERBOSE,1)
 QUIET ?=
diff --git a/tests/python/relay/aot/aot_test_utils.py b/tests/python/relay/aot/aot_test_utils.py
index a54ffb80f0515..34ed7651f83de 100644
--- a/tests/python/relay/aot/aot_test_utils.py
+++ b/tests/python/relay/aot/aot_test_utils.py
@@ -15,27 +15,43 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import enum
 import os
-import io
-import struct
+import itertools
 import numpy as np
 import pathlib
-import shutil
+import pytest
 import subprocess
-import tempfile
 import tarfile
 import json
 
-
 import tvm
 from tvm import relay
-from tvm.relay import transform
 from tvm.contrib import utils, graph_executor
 from tvm.relay.backend import compile_engine
-from tvm.contrib import utils
 from tvm.micro import export_model_library_format
 
 
+def parametrize_aot_options(test):
+    """Parametrize over valid option combinations"""
+
+    interface_api = ["packed", "c"]
+    use_unpacked_api = [True, False]
+    use_calculated_workspaces = [True, False]
+
+    all_combinations = itertools.product(interface_api, use_unpacked_api, use_calculated_workspaces)
+    # Filter out packed operators with c interface
+    valid_combinations = filter(
+        lambda parameters: not (parameters[0] == "c" and parameters[1] == False),
+        all_combinations,
+    )
+
+    return pytest.mark.parametrize(
+        ["interface_api", "use_unpacked_api", "use_calculated_workspaces"],
+        valid_combinations,
+    )(test)
+
+
 def subprocess_with_stdout_and_log(cmd, cwd, logfile, stdout):
     """
     This method runs a process and logs the output to both a log file and stdout
@@ -56,28 +72,12 @@ def subprocess_with_stdout_and_log(cmd, cwd, logfile, stdout):
                     print(text, end="")
 
 
-def create_main(test_name, input_list, output_list, output_path, workspace_bytes):
-    file_path = pathlib.Path(f"{output_path}/" + test_name).resolve()
-    # create header file
-    raw_path = file_path.with_suffix(".c").resolve()
-    with open(raw_path, "w") as main_file:
-        main_file.write("#include <stdio.h>\n")
-        main_file.write("#include <math.h>\n")
-        main_file.write('#include "tvm/runtime/crt/internal/aot_executor/aot_executor.h"\n')
-        main_file.write('#include "tvm/runtime/crt/stack_allocator.h"\n')
-        main_file.write(f"#define WORKSPACE_SIZE ({workspace_bytes})\n")
-        main_file.write("static uint8_t g_aot_memory[WORKSPACE_SIZE];\n")
-
-        for i in range(0, len(input_list)):
-            main_file.write('#include "input_data%i.h"\n' % i)
-        for i in range(0, len(output_list)):
-            main_file.write('#include "expected_output_data%i.h"\n' % i)
-            main_file.write('#include "output_data%i.h"\n' % i)
-
-        main_file.write("extern tvm_model_t network;\n")
-        main_file.write("tvm_workspace_t app_workspace;\n")
-        main_file.write(
-            """
+def emit_main_prologue(main_file, workspace_bytes):
+    main_file.write(f"#define WORKSPACE_SIZE ({workspace_bytes})\n")
+    main_file.write("static uint8_t g_aot_memory[WORKSPACE_SIZE];\n")
+    main_file.write("tvm_workspace_t app_workspace;\n")
+    main_file.write(
+        """
 tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr) {
     return StackMemoryManager_Allocate(&app_workspace, num_bytes, out_ptr);
 }
@@ -91,48 +91,163 @@ def create_main(test_name, input_list, output_list, output_path, workspace_bytes
 void TVMLogf(const char* msg, ...) { }
 
 TVM_DLL int TVMFuncRegisterGlobal(const char* name, TVMFunctionHandle f, int override) {}
+int main(){\n
      
         """
+    )
+
+
+def emit_main_data(main_file, input_map, output_list):
+    for index, _ in enumerate(input_map):
+        main_file.write(f'#include "input_data{index}.h"\n')
+
+    for i in range(0, len(output_list)):
+        main_file.write(f'#include "expected_output_data{i}.h"\n')
+        main_file.write(f'#include "output_data{i}.h"\n')
+
+
+def emit_main_data_structs(main_file, input_map, output_list):
+    main_file.write(f"struct tvm_default_inputs inputs = {{")
+    for index, key in enumerate(input_map):
+        main_file.write(f"\t.{key} = input_data{index},\n")
+    main_file.write("};\n")
+
+    main_file.write(f"struct tvm_default_outputs outputs = {{")
+    num_outputs = len(output_list)
+    if num_outputs == 1:
+        main_file.write(f"\t.output = output_data0,\n")
+    else:
+        for i in range(0, num_outputs):
+            main_file.write(f"\t.output{i} = output_data{i},\n")
+    main_file.write("};\n")
+
+
+def emit_main_data_setup(main_file, input_map, output_list):
+    num_outputs = len(output_list)
+    num_inputs = len(input_map)
+
+    main_file.write(f"void* inputs[{num_inputs}] = {{ ")
+
+    for i in range(0, num_inputs):
+        main_file.write(f"input_data{i}, ")
+    main_file.write("};\n")
+
+    main_file.write(f"void* outputs[{num_outputs}]  = {{ ")
+    for i in range(0, num_outputs):
+        main_file.write(f"output_data{i}, ")
+    main_file.write("};\n")
+
+
+def emit_main_c_interface_call(main_file):
+    main_file.write("tvm_default_run(&inputs, &outputs, NULL, NULL);\n")
+
+
+def emit_main_packed_call(main_file, input_map, output_list):
+    def fake_tensor(source, source_index, packed_index):
+        main_file.write(
+            f"""
+        tensors[{packed_index}].device = fake_device;
+        tensors[{packed_index}].data = {source}[{source_index}];
+        tensors[{packed_index}].shape = &fake_shape;
+        tensors[{packed_index}].ndim = fake_dims;
+        tensors[{packed_index}].byte_offset = 0;
+        tensors[{packed_index}].strides = NULL;
+        tvm_values[{packed_index}].v_handle = &tensors[{packed_index}];
+        """
         )
-        main_file.write("int main(){\n")
-        main_file.write("void* inputs[%i] = { " % (len(input_list)))
-
-        for i in range(0, len(input_list)):
-            main_file.write("input_data%i, " % i)
-        main_file.write("};\n")
-
-        main_file.write("void* outputs[%i]  = { " % (len(output_list)))
-        for i in range(0, len(output_list)):
-            main_file.write("output_data%i, " % i)
-        main_file.write("};\n")
-
-        main_file.write("StackMemoryManager_Init(&app_workspace, g_aot_memory, WORKSPACE_SIZE);")
-        main_file.write("tvm_runtime_run(&network, inputs, outputs);")
-
-        for i in range(0, len(output_list)):
-            is_float_dtype = output_list[i].dtype == "float32"
-            main_file.write("for (int i = 0; i<output_data%i_len; i++){\n" % i)
-            if is_float_dtype:
-                main_file.write(
-                    'if (fabs(output_data%s[i]-expected_output_data%s[i]) > 0.001f){printf("ko\\n");return -1;}\n'
-                    % (i, i)
-                )
-            else:
-                main_file.write(
-                    'if (output_data%s[i]!=expected_output_data%s[i]){printf("ko\\n");return -1;}\n'
-                    % (i, i)
-                )
-            main_file.write("}\n")
-
-        main_file.write('printf("ok\\n");')
-        main_file.write("return 0;")
+
+    main_file.write(
+        """
+    static DLDevice fake_device = {kDLCPU, 0};
+    static int64_t fake_dims = 0;
+    static int64_t fake_shape = {0};
+    """
+    )
+
+    num_outputs = len(output_list)
+    num_inputs = len(input_map)
+    num_tensors = num_inputs + num_outputs
+    main_file.write(
+        f"""
+    DLTensor tensors[{num_tensors}];
+    TVMValue tvm_values[{num_tensors}];
+    int32_t tvm_typeids[{num_tensors}];
+    """
+    )
+
+    for i in range(0, num_inputs):
+        fake_tensor("inputs", i, i)
+    for i in range(0, num_outputs):
+        fake_tensor("outputs", i, i + num_inputs)
+
+    main_file.write("__tvm_main__(tvm_values, tvm_typeids, 0, NULL, 0, NULL);\n")
+
+
+def emit_main_compare(main_file, output_list):
+    for i in range(0, len(output_list)):
+        is_float_dtype = output_list[i].dtype == "float32"
+        main_file.write(f"for (int i = 0; i<output_data{i}_len; i++){{\n")
+        if is_float_dtype:
+            main_file.write(
+                f'if (fabs(output_data{i}[i]-expected_output_data{i}[i]) > 0.001f){{printf("ko\\n");return -1;}}\n'
+            )
+        else:
+            main_file.write(
+                f'if (output_data{i}[i]!=expected_output_data{i}[i]){{printf("ko\\n");return -1;}}\n'
+            )
         main_file.write("}\n")
 
 
+def emit_main_init_memory_manager(main_file):
+    main_file.write("StackMemoryManager_Init(&app_workspace, g_aot_memory, WORKSPACE_SIZE);")
+
+
+def emit_main_epilogue(main_file):
+    main_file.write('printf("ok\\n");')
+    main_file.write("return 0;")
+    main_file.write("}\n")
+
+
+def emit_main_common_includes(main_file):
+    main_file.write("#include <stdio.h>\n")
+    main_file.write("#include <math.h>\n")
+    main_file.write('#include "tvm/runtime/c_runtime_api.h"\n')
+    main_file.write('#include "tvm/runtime/crt/stack_allocator.h"\n')
+
+
+def emit_main_micro_include(main_file):
+    main_file.write("#include <tvm_default.h>\n")
+
+
+def create_main(test_name, input_map, output_list_map, output_path, interface_api, workspace_bytes):
+    file_path = pathlib.Path(f"{output_path}/" + test_name).resolve()
+    # create header file
+    raw_path = file_path.with_suffix(".c").resolve()
+    with open(raw_path, "w") as main_file:
+        emit_main_common_includes(main_file)
+
+        if interface_api == "c":
+            emit_main_micro_include(main_file)
+
+        emit_main_prologue(main_file, workspace_bytes)
+        emit_main_data(main_file, input_map, output_list_map)
+        emit_main_init_memory_manager(main_file)
+
+        if interface_api == "c":
+            emit_main_data_structs(main_file, input_map, output_list_map)
+            emit_main_c_interface_call(main_file)
+        else:
+            emit_main_data_setup(main_file, input_map, output_list_map)
+            emit_main_packed_call(main_file, input_map, output_list_map)
+
+        emit_main_compare(main_file, output_list_map)
+        emit_main_epilogue(main_file)
+
+
 def create_header_file(tensor_name, npy_data, output_path):
     """
     This method generates a header file containing the data contained in the numpy array provided.
-    It is used to capture the tensor data (for both inputs and expected outputs) to be bundled into the standalone ethosu_test_runner.
+    It is used to capture the tensor data (for both inputs and expected outputs) to be bundled into the standalone application.
     """
     file_path = pathlib.Path(f"{output_path}/" + tensor_name).resolve()
     # create header file
@@ -166,9 +281,10 @@ def extract_main_workspace_sizebytes(extract_dir):
 
 def compile_and_run(
     mod,
-    input_list,
+    inputs,
     output_list,
-    target_options,
+    interface_api,
+    use_unpacked_api,
     use_calculated_workspaces,
     params=None,
     workspace_byte_alignment=8,
@@ -176,7 +292,9 @@ def compile_and_run(
     """
     This method verifies the generated source
     """
-    target = f"c -runtime=c --link-params --executor=aot --workspace-byte-alignment={workspace_byte_alignment} {target_options}"
+    base_target = "c -runtime=c --link-params --executor=aot"
+    extra_target = f"--workspace-byte-alignment={workspace_byte_alignment} --interface-api={interface_api} --unpacked-api={int(use_unpacked_api)}"
+    target = f"{base_target} {extra_target}"
     cflags = f"-DTVM_RUNTIME_ALLOC_ALIGNMENT_BYTES={workspace_byte_alignment} "
 
     # The calculated workspaces will not account for stack allocator tags used for debugging
@@ -202,8 +320,8 @@ def compile_and_run(
     else:
         workspace_bytes = 16384 * 1024
 
-    for i in range(len(input_list)):
-        create_header_file((f"input_data{i}"), input_list[i], build_path)
+    for index, key in enumerate(inputs):
+        create_header_file((f"input_data{index}"), inputs[key], build_path)
 
     for i in range(len(output_list)):
         create_header_file(
@@ -213,15 +331,17 @@ def compile_and_run(
         )
         create_header_file((f"expected_output_data{i}"), output_list[i], build_path)
 
-    create_main("test.c", input_list, output_list, build_path, workspace_bytes)
+    create_main("test.c", inputs, output_list, build_path, interface_api, workspace_bytes)
 
     # Verify that compiles fine
     file_dir = os.path.dirname(os.path.abspath(__file__))
+    codegen_path = os.path.join(base_path, "codegen")
     makefile = os.path.join(file_dir, "aot_test.mk")
     make_cmd = (
         f"make CFLAGS='{cflags}' -f {makefile} build_dir="
         + build_path
         + f" TVM_ROOT={file_dir}/../../../.."
+        + f" CODEGEN_ROOT={codegen_path}"
     )
 
     compile_log_path = os.path.join(build_path, "test_compile.log")
diff --git a/tests/python/relay/aot/test_crt_aot.py b/tests/python/relay/aot/test_crt_aot.py
index 4f8de450d9f18..1ced65e979c16 100644
--- a/tests/python/relay/aot/test_crt_aot.py
+++ b/tests/python/relay/aot/test_crt_aot.py
@@ -15,37 +15,42 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import os
-import io
-import struct
 import numpy as np
-import pathlib
-import shutil
-import subprocess
-import tempfile
-import tarfile
 import pytest
+from collections import OrderedDict
 
 import tvm
 from tvm import relay
-from tvm.relay import transform
-from tvm.relay.op.contrib import get_pattern_table
-from tvm.contrib import utils
-from tvm.relay.backend import compile_engine
-from tvm.contrib import utils
-from tvm.contrib import graph_executor
-from tvm.micro import export_model_library_format
-from tvm.relay import testing
+from tvm.relay import testing, transform
 from tvm.relay.op.annotation import compiler_begin, compiler_end
-from tvm.contrib import utils
 from tvm.relay.expr_functor import ExprMutator
 
-from aot_test_utils import *
+from aot_test_utils import generate_ref_data, compile_and_run, parametrize_aot_options
 
 
-@pytest.mark.parametrize("use_calculated_workspaces", [True, False])
-@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"])
-def test_conv_with_params(use_calculated_workspaces, target_options):
+def test_error_c_interface_with_packed_api():
+    interface_api = "c"
+    use_unpacked_api = False
+    use_calculated_workspaces = True
+
+    two = relay.add(relay.const(1), relay.const(1))
+    func = relay.Function([], two)
+    output_list = generate_ref_data(func, {})
+    input_list = []
+
+    with pytest.raises(tvm.TVMError, match="Packed interface required for packed operators"):
+        compile_and_run(
+            func,
+            input_list,
+            output_list,
+            interface_api,
+            use_unpacked_api,
+            use_calculated_workspaces,
+        )
+
+
+@parametrize_aot_options
+def test_conv_with_params(interface_api, use_unpacked_api, use_calculated_workspaces):
     RELAY_MODEL = """
 #[version = "0.0.5"]
 def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(8, 3, 5, 5), int8]) {
@@ -73,13 +78,19 @@ def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(8, 3, 5, 5),
     inputs = {"data": input_data}
     output_list = generate_ref_data(mod, inputs, params)
 
-    input_list = [input_data]
-    compile_and_run(mod, input_list, output_list, target_options, use_calculated_workspaces, params)
+    compile_and_run(
+        mod,
+        inputs,
+        output_list,
+        interface_api,
+        use_unpacked_api,
+        use_calculated_workspaces,
+        params,
+    )
 
 
-@pytest.mark.parametrize("use_calculated_workspaces", [True, False])
-@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"])
-def test_add_with_params(use_calculated_workspaces, target_options):
+@parametrize_aot_options
+def test_add_with_params(interface_api, use_unpacked_api, use_calculated_workspaces):
     x = relay.var("x", shape=(1, 10))
     y = relay.var("y", shape=(1, 10))
     z = relay.add(x, y)
@@ -92,15 +103,19 @@ def test_add_with_params(use_calculated_workspaces, target_options):
     inputs = {"y": y_in}
     output_list = generate_ref_data(func, inputs, params)
 
-    input_list = [y_in]
     compile_and_run(
-        func, input_list, output_list, target_options, use_calculated_workspaces, params
+        func,
+        inputs,
+        output_list,
+        interface_api,
+        use_unpacked_api,
+        use_calculated_workspaces,
+        params,
     )
 
 
-@pytest.mark.parametrize("use_calculated_workspaces", [True, False])
-@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"])
-def test_conv2d(use_calculated_workspaces, target_options):
+@parametrize_aot_options
+def test_conv2d(use_calculated_workspaces, interface_api, use_unpacked_api):
     """Test a subgraph with a single conv2d operator."""
 
     def conv2d_direct():
@@ -119,7 +134,8 @@ def conv2d_direct():
         i_data = np.random.uniform(0, 1, ishape).astype(dtype)
         w1_data = np.random.uniform(0, 1, w1shape).astype(dtype)
 
-        return mod, {"data": i_data, "weight": w1_data}, (1, 32, 14, 14)
+        inputs = OrderedDict([("data", i_data), ("weight", w1_data)])
+        return mod, inputs, (1, 32, 14, 14)
 
     def group_conv2d():
         dtype = "float32"
@@ -137,17 +153,23 @@ def group_conv2d():
         i_data = np.random.uniform(0, 1, ishape).astype(dtype)
         w_data = np.random.uniform(0, 1, w2shape).astype(dtype)
 
-        return mod, {"data": i_data, "weight": w_data}, (1, 32, 14, 14)
+        inputs = OrderedDict([("data", i_data), ("weight", w_data)])
+        return mod, inputs, (1, 32, 14, 14)
 
     for mod, inputs, out_shape in [conv2d_direct(), group_conv2d()]:
         output_list = generate_ref_data(mod, inputs)
-        input_list = [inputs["data"], inputs["weight"]]
-        compile_and_run(mod, input_list, output_list, target_options, use_calculated_workspaces)
-
-
-@pytest.mark.parametrize("use_calculated_workspaces", [True, False])
-@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"])
-def test_concatenate(use_calculated_workspaces, target_options):
+        compile_and_run(
+            mod,
+            inputs,
+            output_list,
+            interface_api,
+            use_unpacked_api,
+            use_calculated_workspaces,
+        )
+
+
+@parametrize_aot_options
+def test_concatenate(interface_api, use_unpacked_api, use_calculated_workspaces):
     dtype = "float32"
     x = relay.var("x", shape=(10, 5), dtype=dtype)
     y = relay.var("y", shape=(10, 5), dtype=dtype)
@@ -159,16 +181,21 @@ def test_concatenate(use_calculated_workspaces, target_options):
     x_data = np.random.rand(10, 5).astype(dtype)
     y_data = np.random.rand(10, 5).astype(dtype)
     t_data = np.random.uniform(size=()).astype(dtype)
-    inputs = {"x": x_data, "y": y_data, "z": t_data}
+    inputs = OrderedDict([("x", x_data), ("y", y_data), ("z", t_data)])
 
     output_list = generate_ref_data(func, inputs)
-    input_list = [inputs["x"], inputs["y"], inputs["z"]]
-    compile_and_run(func, input_list, output_list, target_options, use_calculated_workspaces)
+    compile_and_run(
+        func,
+        inputs,
+        output_list,
+        interface_api,
+        use_unpacked_api,
+        use_calculated_workspaces,
+    )
 
 
-@pytest.mark.parametrize("use_calculated_workspaces", [True, False])
-@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"])
-def test_nested_tuples(use_calculated_workspaces, target_options):
+@parametrize_aot_options
+def test_nested_tuples(interface_api, use_unpacked_api, use_calculated_workspaces):
     x = relay.var("x", shape=(10,))
     x1 = x + relay.const(1.0)
     x2 = x1 + relay.const(1.0)
@@ -180,71 +207,109 @@ def test_nested_tuples(use_calculated_workspaces, target_options):
     x_data = np.random.uniform(size=(10,)).astype(np.float32)
     inputs = {"x": x_data}
     output_list = generate_ref_data(func, inputs)
-    input_list = [x_data]
-    compile_and_run(func, input_list, output_list, target_options, use_calculated_workspaces)
 
+    compile_and_run(
+        func,
+        inputs,
+        output_list,
+        interface_api,
+        use_unpacked_api,
+        use_calculated_workspaces,
+    )
 
-@pytest.mark.parametrize("use_calculated_workspaces", [True, False])
-@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"])
-def test_tuple_getitem(use_calculated_workspaces, target_options):
+
+@parametrize_aot_options
+def test_tuple_getitem(interface_api, use_unpacked_api, use_calculated_workspaces):
     func = relay.Function([], relay.TupleGetItem(relay.Tuple([relay.const(1), relay.const(2)]), 0))
     output_list = generate_ref_data(func, {})
-    input_list = []
-    compile_and_run(func, input_list, output_list, target_options, use_calculated_workspaces)
+    inputs = {}
+
+    compile_and_run(
+        func,
+        inputs,
+        output_list,
+        interface_api,
+        use_unpacked_api,
+        use_calculated_workspaces,
+    )
 
 
-@pytest.mark.parametrize("use_calculated_workspaces", [True, False])
-@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"])
-def test_id(use_calculated_workspaces, target_options):
+@parametrize_aot_options
+def test_id(interface_api, use_unpacked_api, use_calculated_workspaces):
     x = relay.var("x", "float32")
     ident = relay.Function([x], x)
     one = np.array(1.0, "float32")
     inputs = {"x": one}
     output_list = generate_ref_data(ident, inputs)
-    input_list = [one]
-    compile_and_run(ident, input_list, output_list, target_options, use_calculated_workspaces)
+
+    compile_and_run(
+        ident,
+        inputs,
+        output_list,
+        interface_api,
+        use_unpacked_api,
+        use_calculated_workspaces,
+    )
 
 
-@pytest.mark.parametrize("use_calculated_workspaces", [True, False])
-@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"])
-def test_add_const(use_calculated_workspaces, target_options):
+@parametrize_aot_options
+def test_add_const(interface_api, use_unpacked_api, use_calculated_workspaces):
     two = relay.add(relay.const(1), relay.const(1))
     func = relay.Function([], two)
     output_list = generate_ref_data(func, {})
-    input_list = []
-    compile_and_run(func, input_list, output_list, target_options, use_calculated_workspaces)
+    inputs = {}
 
+    compile_and_run(
+        func,
+        inputs,
+        output_list,
+        interface_api,
+        use_unpacked_api,
+        use_calculated_workspaces,
+    )
 
-@pytest.mark.parametrize("use_calculated_workspaces", [True, False])
-@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"])
-def test_mul_param(use_calculated_workspaces, target_options):
+
+@parametrize_aot_options
+def test_mul_param(interface_api, use_unpacked_api, use_calculated_workspaces):
     x = relay.var("x", shape=(10, 10))
     y = relay.var("y", shape=(1, 10))
     func = relay.Function([x, y], relay.multiply(x, y))
     x_data = np.random.rand(10, 10).astype("float32")
     y_data = np.random.rand(1, 10).astype("float32")
-    inputs = {"x": x_data, "y": y_data}
+
+    inputs = OrderedDict([("x", x_data), ("y", y_data)])
     output_list = generate_ref_data(func, inputs)
-    input_list = [inputs["x"], inputs["y"]]
-    compile_and_run(func, input_list, output_list, target_options, use_calculated_workspaces)
+
+    compile_and_run(
+        func,
+        inputs,
+        output_list,
+        interface_api,
+        use_unpacked_api,
+        use_calculated_workspaces,
+    )
 
 
-@pytest.mark.parametrize("use_calculated_workspaces", [True, False])
-@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"])
-def test_subtract(use_calculated_workspaces, target_options):
+@parametrize_aot_options
+def test_subtract(interface_api, use_unpacked_api, use_calculated_workspaces):
     i = relay.var("i", shape=[], dtype="int32")
     sub = relay.subtract(i, relay.const(1, dtype="int32"))
     func = relay.Function([i], sub, ret_type=relay.TensorType([], "int32"))
     i_data = np.array(1, dtype="int32")
     inputs = {"i": i_data}
     output_list = generate_ref_data(func, inputs)
-    input_list = [inputs["i"]]
-    compile_and_run(func, input_list, output_list, target_options, use_calculated_workspaces)
+    compile_and_run(
+        func,
+        inputs,
+        output_list,
+        interface_api,
+        use_unpacked_api,
+        use_calculated_workspaces,
+    )
 
 
-@pytest.mark.parametrize("use_calculated_workspaces", [True, False])
-@pytest.mark.parametrize("target_options", ["--unpacked-api=0", "--unpacked-api=1"])
-def test_tuple_output(use_calculated_workspaces, target_options):
+@parametrize_aot_options
+def test_tuple_output(interface_api, use_unpacked_api, use_calculated_workspaces):
     x = relay.var("x", shape=(6, 9))
     y = relay.split(x, 3).astuple()
     a = relay.TupleGetItem(y, 0)
@@ -255,29 +320,34 @@ def test_tuple_output(use_calculated_workspaces, target_options):
     x_data = np.random.rand(6, 9).astype("float32")
     inputs = {"x": x_data}
     output_list = generate_ref_data(func, inputs)
-    input_list = [inputs["x"]]
-    compile_and_run(func, input_list, output_list, target_options, use_calculated_workspaces)
+    compile_and_run(
+        func,
+        inputs,
+        output_list,
+        interface_api,
+        use_unpacked_api,
+        use_calculated_workspaces,
+    )
 
 
 @pytest.mark.parametrize(
-    "use_calculated_workspaces_and_alignment", [(True, 1), (True, 16), (False, 1)]
+    ["use_calculated_workspaces", "workspace_byte_alignment"], [(True, 1), (True, 16), (False, 1)]
 )
-@pytest.mark.parametrize("target_options", ["--unpacked-api"])
-def test_mobilenet(use_calculated_workspaces_and_alignment, target_options):
-    use_calculated_workspaces = use_calculated_workspaces_and_alignment[0]
-    workspace_byte_alignment = use_calculated_workspaces_and_alignment[1]
+def test_mobilenet(use_calculated_workspaces, workspace_byte_alignment):
+    use_unpacked_api = True
+    interface_api = "micro"
 
     mod, params = testing.mobilenet.get_workload(batch_size=1)
     data_shape = [int(x) for x in mod["main"].checked_type.arg_types[0].shape]
     data = np.random.uniform(size=data_shape).astype("float32")
     inputs = {"data": data}
     output_list = generate_ref_data(mod, inputs, params)
-    input_list = [inputs["data"]]
     compile_and_run(
         mod,
-        input_list,
+        inputs,
         output_list,
-        target_options,
+        interface_api,
+        use_unpacked_api,
         use_calculated_workspaces,
         params,
         workspace_byte_alignment,
@@ -339,9 +409,11 @@ def visit_call(self, call):
 
 
 @pytest.mark.parametrize("use_calculated_workspaces", [True, False])
-@pytest.mark.parametrize("target_options", [""])
-def test_byoc_utvm(use_calculated_workspaces, target_options):
+def test_byoc_utvm(use_calculated_workspaces):
     """This is a simple test case to check BYOC capabilities of AOT"""
+    use_unpacked_api = False
+    interface_api = "packed"
+
     x = relay.var("x", shape=(10, 10))
     w0 = relay.var("w0", shape=(10, 10))
     w1 = relay.var("w1", shape=(10, 10))
@@ -378,12 +450,16 @@ def test_byoc_utvm(use_calculated_workspaces, target_options):
     for _ in range(8):
         w_data.append(np.random.rand(10, 10).astype("float32"))
 
-    map_inputs = {"w{}".format(i): w_data[i] for i in range(8)}
-    map_inputs["x"] = x_data
+    map_inputs = OrderedDict([("x", x_data)] + [("w{}".format(i), w_data[i]) for i in range(8)])
     output_list = generate_ref_data(mod, map_inputs)
-    input_list = [map_inputs["x"]]
-    input_list.extend([map_inputs["w{}".format(i)] for i in range(8)])
-    compile_and_run(mod, input_list, output_list, target_options, use_calculated_workspaces)
+    compile_and_run(
+        mod,
+        map_inputs,
+        output_list,
+        interface_api,
+        use_unpacked_api,
+        use_calculated_workspaces,
+    )
 
 
 if __name__ == "__main__":
diff --git a/tests/python/unittest/test_micro_model_library_format.py b/tests/python/unittest/test_micro_model_library_format.py
index d2c519da22b5a..874e245aa6524 100644
--- a/tests/python/unittest/test_micro_model_library_format.py
+++ b/tests/python/unittest/test_micro_model_library_format.py
@@ -46,14 +46,20 @@ def validate_graph_json(extract_dir, factory):
 
 @tvm.testing.requires_micro
 @pytest.mark.parametrize(
-    "target",
+    ["executor", "target", "should_generate_interface"],
     [
-        ("graph", tvm.target.target.micro("host")),
-        ("aot", tvm.target.target.micro("host", options="-executor=aot")),
+        ("graph", tvm.target.target.micro("host"), False),
+        ("aot", tvm.target.target.micro("host", options="-executor=aot"), False),
+        (
+            "aot",
+            tvm.target.target.micro(
+                "host", options="-executor=aot --unpacked-api=1 --interface-api=c"
+            ),
+            True,
+        ),
     ],
 )
-def test_export_model_library_format_c(target):
-    executor, _target = target
+def test_export_model_library_format_c(executor, target, should_generate_interface):
     with utils.TempDirectory.set_keep_for_debug(True):
         with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
             relay_mod = tvm.parser.fromtext(
@@ -66,8 +72,8 @@ def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), float32], %c : Tensor[
             )
             factory = tvm.relay.build(
                 relay_mod,
-                _target,
-                target_host=_target,
+                target,
+                target_host=target,
                 mod_name="add",
                 params={"c": numpy.array([[2.0, 4.0]], dtype="float32")},
             )
@@ -91,7 +97,7 @@ def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), float32], %c : Tensor[
                 metadata["export_datetime"], "%Y-%m-%d %H:%M:%SZ"
             )
             assert (datetime.datetime.now() - export_datetime) < datetime.timedelta(seconds=60 * 5)
-            assert metadata["target"] == {"1": str(_target)}
+            assert metadata["target"] == {"1": str(target)}
             if executor == "graph":
                 assert metadata["memory"]["sids"] == [
                     {"storage_id": 0, "size_bytes": 2, "input_binding": "a"},
@@ -117,6 +123,9 @@ def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), float32], %c : Tensor[
 
         assert os.path.exists(os.path.join(extract_dir, "codegen", "host", "src", "lib0.c"))
         assert os.path.exists(os.path.join(extract_dir, "codegen", "host", "src", "lib1.c"))
+        assert should_generate_interface == os.path.exists(
+            os.path.join(extract_dir, "codegen", "host", "include", "tvm_add.h")
+        )
 
         if executor == "graph":
             validate_graph_json(extract_dir, factory)
@@ -208,14 +217,13 @@ def @main(%a : Tensor[(1, 2), uint8], %b : Tensor[(1, 2), float32], %c : Tensor[
 
 @tvm.testing.requires_micro
 @pytest.mark.parametrize(
-    "target",
+    ["target"],
     [
-        ("graph", tvm.target.target.micro("host")),
-        ("aot", tvm.target.target.micro("host", options="-executor=aot")),
+        (tvm.target.target.micro("host"),),
+        (tvm.target.target.micro("host", options="-executor=aot"),),
     ],
 )
 def test_export_model_library_format_workspace(target):
-    executor, _target = target
     with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
         relay_mod = tvm.parser.fromtext(
             """
@@ -229,7 +237,7 @@ def @main(%p0: Tensor[(1, 56, 56, 128), int16], %p1: Tensor[(3, 3, 128, 1), int1
             }
             """
         )
-        factory = tvm.relay.build(relay_mod, _target, target_host=_target, mod_name="qnn_conv2d")
+        factory = tvm.relay.build(relay_mod, target, target_host=target, mod_name="qnn_conv2d")
 
     temp_dir = utils.tempdir()
     mlf_tar_path = temp_dir.relpath("lib.tar")
@@ -250,7 +258,7 @@ def @main(%p0: Tensor[(1, 56, 56, 128), int16], %p1: Tensor[(3, 3, 128, 1), int1
             metadata["export_datetime"], "%Y-%m-%d %H:%M:%SZ"
         )
         assert (datetime.datetime.now() - export_datetime) < datetime.timedelta(seconds=60 * 5)
-        assert metadata["target"] == {"1": str(_target)}
+        assert metadata["target"] == {"1": str(target)}
         assert metadata["memory"]["functions"]["main"] == [
             {
                 "constants_size_bytes": 0,
@@ -271,9 +279,6 @@ def @main(%p0: Tensor[(1, 56, 56, 128), int16], %p1: Tensor[(3, 3, 128, 1), int1
 @tvm.testing.requires_micro
 def test_export_model():
     module = tvm.support.FrontendTestModule()
-    factory = executor_factory.GraphExecutorFactoryModule(
-        None, tvm.target.target.micro("host"), '"graph_json"', module, "test_module", {}, {}
-    )
 
     temp_dir = utils.tempdir()
     import tvm.micro as micro