Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ETHOSN] Update driver stack version to 22.11 #13637

Merged
merged 1 commit into from
Dec 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 1 addition & 15 deletions cmake/utils/FindEthosN.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -58,18 +58,6 @@ macro(find_ethosn use_ethosn)
PATHS ${__ethosn_stack}/lib)
find_library(ETHOSN_COMPILER_LIBRARY NAMES EthosNSupport)

list(GET ETHOSN_INCLUDE_DIRS 0 filename)
set(filename "${filename}/ethosn_support_library/Support.hpp")
file(READ ${filename} ETHOSN_SUPPORT_H)
string(REGEX MATCH "VERSION_MAJOR ([0-9]*)" _ ${ETHOSN_SUPPORT_H})
set(ver_major ${CMAKE_MATCH_1})
string(REGEX MATCH "VERSION_MINOR ([0-9]*)" _ ${ETHOSN_SUPPORT_H})
set(ver_minor ${CMAKE_MATCH_1})
string(REGEX MATCH "VERSION_PATCH ([0-9]*)" _ ${ETHOSN_SUPPORT_H})
set(ver_patch ${CMAKE_MATCH_1})
set(ETHOSN_PACKAGE_VERSION "${ver_major}.${ver_minor}.${ver_patch}")
set(ETHOSN_DEFINITIONS -DETHOSN_API_VERSION=${USE_ETHOSN_API_VERSION})
lhutton1 marked this conversation as resolved.
Show resolved Hide resolved

# Runtime hardware support. Driver library also needed for
# test support.
find_path(_DL_DIR NAMES Network.hpp
Expand All @@ -81,9 +69,7 @@ macro(find_ethosn use_ethosn)
PATHS ${__ethosn_stack}/lib)
find_library(ETHOSN_RUNTIME_LIBRARY NAMES EthosNDriver)
if(${USE_ETHOSN_HW} MATCHES ${IS_TRUE_PATTERN})
set(ETHOSN_DEFINITIONS -DETHOSN_HW -DETHOSN_API_VERSION=${USE_ETHOSN_API_VERSION})
else()
set(ETHOSN_DEFINITIONS -DETHOSN_API_VERSION=${USE_ETHOSN_API_VERSION})
set(ETHOSN_DEFINITIONS -DETHOSN_HW)
endif()

if(ETHOSN_COMPILER_LIBRARY)
Expand Down
2 changes: 1 addition & 1 deletion docker/install/ubuntu_install_ethosn_driver_stack.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ set -o pipefail

repo_url="https://github.com/Arm-software/ethos-n-driver-stack"
repo_dir="ethosn-driver"
repo_revision="22.08"
repo_revision="22.11"
install_path="/opt/arm/$repo_dir"

tmpdir=$(mktemp -d)
Expand Down
2 changes: 1 addition & 1 deletion python/tvm/relay/op/contrib/ethosn.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def partition_for_ethosn(mod, params=None, **opts):
ret : annotated and partitioned module.
"""
api_version = ethosn_api_version()
supported_api_versions = ["3.1.0"]
supported_api_versions = ["3.2.0", "3.1.0"]
if all(api_version != LooseVersion(exp_ver) for exp_ver in supported_api_versions):
raise ValueError(
f"Driver stack version {api_version} is unsupported. "
Expand Down
89 changes: 85 additions & 4 deletions src/runtime/contrib/ethosn/ethosn_device.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@

#include "ethosn_driver_library/Inference.hpp"
#include "ethosn_driver_library/Network.hpp"
#ifdef _ETHOSN_API_VERSION_3_2_0
#include "ethosn_driver_library/ProcMemAllocator.hpp"
#endif

namespace tvm {
namespace runtime {
Expand Down Expand Up @@ -87,6 +90,81 @@ InferenceWaitStatus WaitForInference(dl::Inference* inference, int timeout) {
return InferenceWaitStatus(InferenceWaitErrorCode::kSuccess);
}

#ifdef _ETHOSN_API_VERSION_3_2_0
void CreateBuffers(dl::ProcMemAllocator* proc_mem_alloc,
std::vector<std::shared_ptr<dl::Buffer>>* fm,
const std::vector<DLTensor*>& tensors, const std::vector<uint32_t>& tensor_sizes,
bool input) {
for (size_t i = 0; i < tensors.size(); i++) {
auto* data = static_cast<uint8_t*>(tensors[i]->data);
if (input) {
(*fm)[i] = std::make_shared<dl::Buffer>(
proc_mem_alloc->CreateBuffer(data, tensor_sizes[i], dl::DataFormat::NHWC));
} else {
(*fm)[i] = std::make_shared<dl::Buffer>(
proc_mem_alloc->CreateBuffer(tensor_sizes[i], dl::DataFormat::NHWC));
}
}
}

bool Inference(tvm::runtime::TVMArgs args, dl::ProcMemAllocator* proc_mem_alloc, dl::Network* npu,
const std::vector<uint32_t>& input_order, const std::vector<uint32_t>& output_order,
const std::vector<uint32_t>& input_sizes,
const std::vector<uint32_t>& output_sizes) {
// Unpack parameters
size_t n_inputs = input_order.size();
size_t n_outputs = output_order.size();
std::vector<DLTensor*> inputs(n_inputs);
for (size_t i = 0; i < n_inputs; i++) {
inputs[i] = args[input_order[i]];
}
std::vector<DLTensor*> outputs(n_outputs);
size_t output_offset = n_inputs;
for (size_t i = 0; i < n_outputs; i++) {
outputs[i] = args[output_order[i] + output_offset];
}

// Set up input buffers
std::vector<std::shared_ptr<dl::Buffer>> ifm(n_inputs);
CreateBuffers(proc_mem_alloc, &ifm, inputs, input_sizes, true);

// Set up output buffers
std::vector<std::shared_ptr<dl::Buffer>> ofm(n_outputs);
CreateBuffers(proc_mem_alloc, &ofm, outputs, output_sizes, false);

// Raw pointers for the inference
dl::Buffer* ifm_raw[n_inputs];
for (size_t i = 0; i < n_inputs; i++) {
ifm_raw[i] = ifm[i].get();
}
dl::Buffer* ofm_raw[n_outputs];
for (size_t i = 0; i < n_outputs; i++) {
ofm_raw[i] = ofm[i].get();
}

// Execute the inference.
std::unique_ptr<dl::Inference> inference(
npu->ScheduleInference(ifm_raw, n_inputs, ofm_raw, n_outputs));
InferenceWaitStatus result = WaitForInference(inference.get(), 60);

if (result.GetErrorCode() != InferenceWaitErrorCode::kSuccess) {
LOG(FATAL) << "An error has occured waiting for the inference of a sub-graph on the NPU: "
<< result.GetErrorDescription();
}

for (size_t i = 0; i < n_outputs; i++) {
DLTensor* tensor = outputs[i];
dl::Buffer* source_buffer = ofm_raw[i];
uint8_t* dest_buffer = static_cast<uint8_t*>(tensor->data);
size_t size = source_buffer->GetSize();
uint8_t* source_buffer_data = source_buffer->Map();
std::copy(source_buffer_data, source_buffer_data + size, dest_buffer);
source_buffer->Unmap();
}

return true;
}
#else
void CreateBuffers(std::vector<std::shared_ptr<dl::Buffer>>* fm,
const std::vector<DLTensor*>& tensors, const std::vector<uint32_t>& tensor_sizes,
bool input) {
Expand Down Expand Up @@ -157,7 +235,7 @@ bool Inference(tvm::runtime::TVMArgs args, dl::Network* npu,

return true;
}

#endif
} // namespace ethosn
} // namespace runtime
} // namespace tvm
Expand Down Expand Up @@ -192,9 +270,12 @@ TVM_REGISTER_GLOBAL("relay.ethos-n.test.infra.inference_result")
});

// Allow the ethos-n support code to be tested without a device
bool Inference(tvm::runtime::TVMArgs args, dl::Network* /* npu */,
const std::vector<uint32_t>& input_order, const std::vector<uint32_t>& output_order,
const std::vector<uint32_t>& input_sizes,
bool Inference(tvm::runtime::TVMArgs args,
#ifdef _ETHOSN_API_VERSION_3_2_0
dl::ProcMemAllocator* proc_mem_alloc,
#endif
dl::Network* /* npu */, const std::vector<uint32_t>& input_order,
const std::vector<uint32_t>& output_order, const std::vector<uint32_t>& input_sizes,
const std::vector<uint32_t>& output_sizes) {
std::vector<DLTensor*> outputs;
for (int argc = input_order.size(); argc < args.size(); argc++) {
Expand Down
7 changes: 6 additions & 1 deletion src/runtime/contrib/ethosn/ethosn_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,15 @@ namespace dl = ::ethosn::driver_library;

using tvm::runtime::TVMArgs;

#ifdef _ETHOSN_API_VERSION_3_2_0
bool Inference(tvm::runtime::TVMArgs args, dl::ProcMemAllocator* proc_mem_alloc, dl::Network* npu,
const std::vector<uint32_t>& input_order, const std::vector<uint32_t>& output_order,
const std::vector<uint32_t>& input_sizes, const std::vector<uint32_t>& output_sizes);
#else
bool Inference(tvm::runtime::TVMArgs args, dl::Network* npu,
const std::vector<uint32_t>& input_order, const std::vector<uint32_t>& output_order,
const std::vector<uint32_t>& input_sizes, const std::vector<uint32_t>& output_sizes);

#endif
} // namespace ethosn
} // namespace runtime
} // namespace tvm
Expand Down
20 changes: 20 additions & 0 deletions src/runtime/contrib/ethosn/ethosn_runtime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ EthosnModule::EthosnModule(std::vector<OrderedCompiledNetwork>* cmms) {
if (it.compiled_cmm != nullptr) {
network_map_[it.name].compiled_cmm = std::move(it.compiled_cmm);
}
#ifdef _ETHOSN_API_VERSION_3_2_0
if (it.proc_mem_alloc != nullptr) {
network_map_[it.name].proc_mem_alloc = std::move(it.proc_mem_alloc);
}
#endif
if (it.runtime_cmm != nullptr) {
network_map_[it.name].runtime_cmm = std::move(it.runtime_cmm);
}
Expand All @@ -67,9 +72,16 @@ PackedFunc EthosnModule::GetFunction(const std::string& name,
const ObjectPtr<Object>& sptr_to_self) {
if (network_map_.find(name) != network_map_.end()) {
return PackedFunc([sptr_to_self, this, name](TVMArgs args, TVMRetValue* rv) {
#ifdef _ETHOSN_API_VERSION_3_2_0
*rv = Inference(args, network_map_[name].proc_mem_alloc.get(),
network_map_[name].runtime_cmm.get(), network_map_[name].inputs,
network_map_[name].outputs, network_map_[name].input_sizes,
network_map_[name].output_sizes);
#else
*rv = Inference(args, network_map_[name].runtime_cmm.get(), network_map_[name].inputs,
network_map_[name].outputs, network_map_[name].input_sizes,
network_map_[name].output_sizes);
#endif
});
} else {
return PackedFunc();
Expand Down Expand Up @@ -102,6 +114,9 @@ Module EthosnModule::LoadFromBinary(void* strm) {
cmms.resize(func_count);
for (unsigned int i = 0; i < func_count; i++) {
OrderedCompiledNetwork& compiled = cmms[i];
#ifdef _ETHOSN_API_VERSION_3_2_0
compiled.proc_mem_alloc = std::make_unique<dl::ProcMemAllocator>();
#endif
std::string ext_symbol;
std::string cmm;
uint64_t input_size;
Expand All @@ -114,7 +129,12 @@ Module EthosnModule::LoadFromBinary(void* strm) {
#if defined ETHOSN_HW
// If hardware unavaiable use the mock inference functionality. If hardware is
// avaiable, deserialize the compiled graph.
#ifdef _ETHOSN_API_VERSION_3_2_0
compiled.runtime_cmm = std::make_unique<dl::Network>(
compiled.proc_mem_alloc->CreateNetwork(cmm.c_str(), cmm.size()));
#else
compiled.runtime_cmm = std::make_unique<dl::Network>(cmm.c_str(), cmm.size());
#endif
#endif
// Read the number of inputs
stream->Read<uint64_t>(&input_size);
Expand Down
11 changes: 11 additions & 0 deletions src/runtime/contrib/ethosn/ethosn_runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,14 @@
#include "ethosn_driver_library/Network.hpp"
#include "ethosn_support_library/Support.hpp"

#if ETHOSN_SUPPORT_LIBRARY_VERSION_MAJOR == 3 && ETHOSN_SUPPORT_LIBRARY_VERSION_MINOR == 2 && \
ETHOSN_SUPPORT_LIBRARY_VERSION_PATCH == 0
#define _ETHOSN_API_VERSION_3_2_0
#endif
#ifdef _ETHOSN_API_VERSION_3_2_0
#include "ethosn_driver_library/ProcMemAllocator.hpp"
#endif

namespace tvm {
namespace runtime {
namespace ethosn {
Expand All @@ -46,6 +54,9 @@ namespace dl = ::ethosn::driver_library;
struct OrderedCompiledNetwork {
std::unique_ptr<sl::CompiledNetwork> compiled_cmm;
std::unique_ptr<dl::Network> runtime_cmm;
#ifdef _ETHOSN_API_VERSION_3_2_0
std::unique_ptr<dl::ProcMemAllocator> proc_mem_alloc;
#endif
std::string name;
std::vector<uint32_t> inputs;
std::vector<uint32_t> outputs;
Expand Down
1 change: 0 additions & 1 deletion tests/python/contrib/test_ethosn/infrastructure.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,6 @@ def build(
if not additional_config_args:
additional_config_args = {}
npu_config = {**get_ethosn_device_options(), **additional_config_args}
print(npu_config)
with tvm.transform.PassContext(opt_level=3, config={"relay.ext.ethos-n.options": npu_config}):
with tvm.target.Target("llvm"):
if npu:
Expand Down
6 changes: 5 additions & 1 deletion tests/python/contrib/test_ethosn/test_conv2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import tvm
from tvm import relay
from tvm.relay.op.contrib import ethosn_api_version
from tvm.testing import requires_ethosn

from . import infrastructure as tei
Expand Down Expand Up @@ -227,7 +228,10 @@ def test_conv2d_depthwise(
)
),
}
input_zp = np.random.randint(np.iinfo(dtype).min, np.iinfo(dtype).max)
if ethosn_api_version() == "3.2.0":
input_zp = np.random.randint(0, np.iinfo(dtype).max)
else:
input_zp = np.random.randint(np.iinfo(dtype).min, np.iinfo(dtype).max)
input_sc = np.random.random() * 2
if qnn_per_channel:
kernel_sc = tvm.nd.array(
Expand Down
68 changes: 67 additions & 1 deletion tests/python/contrib/test_ethosn/test_conv2d_transpose.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import tvm
from tvm import relay
from tvm.relay.op.contrib import ethosn_api_version
from tvm.testing import requires_ethosn
from . import infrastructure as tei

Expand Down Expand Up @@ -115,7 +116,7 @@ def _get_model(
[
((1, 2, 2, 1), (2, 2), (1, 1), 1, False),
((1, 2, 2, 5), (2, 2), (3, 5), 4, False),
((1, 7, 7, 4), (2, 2), (7, 9), 8, True),
((1, 7, 7, 4), (2, 2), (7, 7), 8, True),
lhutton1 marked this conversation as resolved.
Show resolved Hide resolved
],
)
def test_conv2d_transpose(ifm_shape, strides, kernel_size, out_channels, dtype, bias):
Expand Down Expand Up @@ -169,6 +170,71 @@ def test_conv2d_transpose(ifm_shape, strides, kernel_size, out_channels, dtype,
tei.verify(outputs, dtype, 1)


@requires_ethosn
@pytest.mark.parametrize("dtype", ["uint8", "int8"])
@pytest.mark.parametrize(
"ifm_shape,strides,kernel_size,out_channels,bias",
[
((1, 10, 20, 3), (1, 1), (8, 5), 4, False),
((1, 10, 10, 2), (2, 2), (7, 9), 8, True),
],
)
def test_conv2d_transpose_kernel_size_gt_8(
ifm_shape, strides, kernel_size, out_channels, dtype, bias
):
"""Check transpose convolution for big kernel sizes."""
if ethosn_api_version() in ["3.2.0", "3.1.0"]:
pytest.skip("Skipping because NPU driver 22.11 fails to interpret zp used in the test.")

np.random.seed(0)

kernel_layout = "IOHW"
dilation = (1, 1)
groups = 1

iinfo = np.iinfo(dtype)
data_min = iinfo.min
data_max = iinfo.max

input_zp = np.random.randint(data_min, data_max)
input_sc = np.random.random() * 2
kernel_zp = np.random.randint(data_min, data_max)
kernel_sc = np.random.random() * 4
output_zp, output_sc = tei.get_conv2d_qnn_params(
dtype, input_zp, input_sc, kernel_zp, kernel_sc, ifm_shape[1], ifm_shape[2], ifm_shape[3]
)

model, params = _get_model(
shape=ifm_shape,
kernel_h=kernel_size[0],
kernel_w=kernel_size[1],
input_zp=input_zp,
input_sc=input_sc,
kernel_zp=kernel_zp,
kernel_sc=kernel_sc,
output_zp=output_zp,
output_sc=output_sc,
stride=strides,
dilation=dilation,
groups=groups,
kernel_layout=kernel_layout,
dtype=dtype,
out_channels=out_channels,
bias=bias,
)

outputs = []
inputs = {
"a": tvm.nd.array(np.random.randint(data_min, data_max + 1, size=ifm_shape, dtype=dtype))
}

for npu in [False, True]:
mod = tei.make_module(model, params)
outputs.append(tei.build_and_run(mod, inputs, 1, params, npu=npu))

tei.verify(outputs, dtype, 1)


@requires_ethosn
@pytest.mark.parametrize("dtype", ["uint8", "int8"])
@pytest.mark.parametrize(
Expand Down
8 changes: 6 additions & 2 deletions tests/python/contrib/test_ethosn/test_leaky_relu.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import tvm
from tvm import relay
from tvm.relay.op.contrib import ethosn_api_version
from tvm.testing import requires_ethosn

from . import infrastructure as tei
Expand Down Expand Up @@ -55,9 +56,12 @@ def test_leaky_relu(dtype, shape, alpha):
iinfo = np.iinfo(dtype)
zp_min = iinfo.min
zp_max = iinfo.max
input_zp = zp_min + 120
if ethosn_api_version() == "3.2.0":
input_zp = zp_min + 128
else:
input_zp = zp_min + 120
lhutton1 marked this conversation as resolved.
Show resolved Hide resolved
input_sc = 0.0068132
output_zp = zp_min + 128
output_zp = zp_min + 126 # values offset more than 126 can cause saturation
output_sc = 0.0078125

inputs = {"x": tvm.nd.array(np.random.randint(zp_min, high=zp_max, size=shape, dtype=dtype))}
Expand Down
Loading