diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc index e96255e976e9..f0d360ae8b6d 100644 --- a/src/relay/backend/contrib/dnnl/codegen.cc +++ b/src/relay/backend/contrib/dnnl/codegen.cc @@ -67,11 +67,13 @@ std::vector Conv2d(const CallNode* call) { args.push_back(std::to_string(s)); } - // Args: O, G, Ph, Pw, Kh, Kw, Sh, Sw + // Args: O, G, Ph0, Pw0, Ph1, Pw1, Kh, Kw, Sh, Sw args.push_back(std::to_string(wshape[0])); args.push_back(std::to_string(conv2d_attr->groups)); args.push_back(std::to_string(conv2d_attr->padding[0].as()->value)); args.push_back(std::to_string(conv2d_attr->padding[1].as()->value)); + args.push_back(std::to_string(conv2d_attr->padding[2].as()->value)); + args.push_back(std::to_string(conv2d_attr->padding[3].as()->value)); args.push_back(std::to_string(wshape[2])); args.push_back(std::to_string(wshape[3])); args.push_back(std::to_string(conv2d_attr->strides[0].as()->value)); diff --git a/src/runtime/contrib/dnnl/dnnl.cc b/src/runtime/contrib/dnnl/dnnl.cc index 5b9f5e17232c..19b3f796fd33 100644 --- a/src/runtime/contrib/dnnl/dnnl.cc +++ b/src/runtime/contrib/dnnl/dnnl.cc @@ -53,8 +53,9 @@ inline void read_from_dnnl_memory(void* handle, const memory& mem) { } void dnnl_conv2d_common(float* data, float* weights, float* bias, float* out, int p_N_, int p_C_, - int p_H_, int p_W_, int p_O_, int p_G_, int p_Ph_, int p_Pw_, int p_Kh_, - int p_Kw_, int p_Sh_, int p_Sw_, primitive_attr attr) { + int p_H_, int p_W_, int p_O_, int p_G_, int p_Ph0_, int p_Pw0_, int p_Ph1_, + int p_Pw1_, int p_Kh_, int p_Kw_, int p_Sh_, int p_Sw_, + primitive_attr attr) { using tag = memory::format_tag; using dt = memory::data_type; engine eng(engine::kind::cpu, 0); @@ -64,10 +65,11 @@ void dnnl_conv2d_common(float* data, float* weights, float* bias, float* out, in memory::dims conv2d_weights_tz = {p_O_, p_C_, p_Kh_, p_Kw_}; if (p_G_ > 1) conv2d_weights_tz = {p_G_, 1, p_C_ / p_G_, p_Kh_, p_Kw_}; memory::dims conv2d_bias_tz = {p_O_}; - memory::dims conv2d_dst_tz = {p_N_, p_O_, (p_H_ - p_Kh_ + 2 * p_Ph_ + p_Sh_) / p_Sh_, - (p_W_ - p_Kw_ + 2 * p_Pw_ + p_Sw_) / p_Sw_}; + memory::dims conv2d_dst_tz = {p_N_, p_O_, (p_H_ - p_Kh_ + p_Ph0_ + p_Ph1_ + p_Sh_) / p_Sh_, + (p_W_ - p_Kw_ + p_Pw0_ + p_Pw1_ + p_Sw_) / p_Sw_}; memory::dims conv2d_strides = {p_Sh_, p_Sw_}; - memory::dims conv2d_padding = {p_Ph_, p_Pw_}; + memory::dims conv2d_padding0 = {p_Ph0_, p_Pw0_}; + memory::dims conv2d_padding1 = {p_Ph1_, p_Pw1_}; auto user_src_memory = memory({{conv2d_src_tz}, dt::f32, tag::nchw}, eng, data); auto user_weights_memory = @@ -81,7 +83,7 @@ void dnnl_conv2d_common(float* data, float* weights, float* bias, float* out, in auto conv2d_desc = convolution_forward::desc( prop_kind::forward_inference, algorithm::convolution_direct, conv2d_src_md, conv2d_weights_md, - conv2d_bias_md, conv2d_dst_md, conv2d_strides, conv2d_padding, conv2d_padding); + conv2d_bias_md, conv2d_dst_md, conv2d_strides, conv2d_padding0, conv2d_padding1); auto conv2d_prim_desc = convolution_forward::primitive_desc(conv2d_desc, attr, eng); auto conv2d_src_memory = user_src_memory; @@ -98,12 +100,12 @@ void dnnl_conv2d_common(float* data, float* weights, float* bias, float* out, in } extern "C" void dnnl_conv2d(float* data, float* weights, float* out, int p_N_, int p_C_, int p_H_, - int p_W_, int p_O_, int p_G_, int p_Ph_, int p_Pw_, int p_Kh_, - int p_Kw_, int p_Sh_, int p_Sw_) { + int p_W_, int p_O_, int p_G_, int p_Ph0_, int p_Pw0_, int p_Ph1_, + int p_Pw1_, int p_Kh_, int p_Kw_, int p_Sh_, int p_Sw_) { primitive_attr attr; std::vector bias(p_O_, 0); return dnnl_conv2d_common(data, weights, bias.data(), out, p_N_, p_C_, p_H_, p_W_, p_O_, p_G_, - p_Ph_, p_Pw_, p_Kh_, p_Kw_, p_Sh_, p_Sw_, attr); + p_Ph0_, p_Pw0_, p_Ph1_, p_Pw1_, p_Kh_, p_Kw_, p_Sh_, p_Sw_, attr); } primitive_attr create_attr_with_relu_post_op() { @@ -117,20 +119,23 @@ primitive_attr create_attr_with_relu_post_op() { } extern "C" void dnnl_fused_conv2d_relu(float* data, float* weights, float* out, int p_N_, int p_C_, - int p_H_, int p_W_, int p_O_, int p_G_, int p_Ph_, int p_Pw_, - int p_Kh_, int p_Kw_, int p_Sh_, int p_Sw_) { + int p_H_, int p_W_, int p_O_, int p_G_, int p_Ph0_, + int p_Pw0_, int p_Ph1_, int p_Pw1_, int p_Kh_, int p_Kw_, + int p_Sh_, int p_Sw_) { std::vector bias(p_O_, 0); return dnnl_conv2d_common(data, weights, bias.data(), out, p_N_, p_C_, p_H_, p_W_, p_O_, p_G_, - p_Ph_, p_Pw_, p_Kh_, p_Kw_, p_Sh_, p_Sw_, + p_Ph0_, p_Pw0_, p_Ph1_, p_Pw1_, p_Kh_, p_Kw_, p_Sh_, p_Sw_, create_attr_with_relu_post_op()); } extern "C" void dnnl_fused_conv2d_bias_relu(float* data, float* weights, float* bias, float* out, int p_N_, int p_C_, int p_H_, int p_W_, int p_O_, - int p_G_, int p_Ph_, int p_Pw_, int p_Kh_, int p_Kw_, - int p_Sh_, int p_Sw_) { - return dnnl_conv2d_common(data, weights, bias, out, p_N_, p_C_, p_H_, p_W_, p_O_, p_G_, p_Ph_, - p_Pw_, p_Kh_, p_Kw_, p_Sh_, p_Sw_, create_attr_with_relu_post_op()); + int p_G_, int p_Ph0_, int p_Pw0_, int p_Ph1_, + int p_Pw1_, int p_Kh_, int p_Kw_, int p_Sh_, + int p_Sw_) { + return dnnl_conv2d_common(data, weights, bias, out, p_N_, p_C_, p_H_, p_W_, p_O_, p_G_, p_Ph0_, + p_Pw0_, p_Ph1_, p_Pw1_, p_Kh_, p_Kw_, p_Sh_, p_Sw_, + create_attr_with_relu_post_op()); } extern "C" void dnnl_dense(float* data, float* weight, float* out, int p_B_, int p_I_, int p_O_) { diff --git a/src/runtime/contrib/dnnl/dnnl_kernel.h b/src/runtime/contrib/dnnl/dnnl_kernel.h index dbc064a6bc99..f5f28fccd8e7 100644 --- a/src/runtime/contrib/dnnl/dnnl_kernel.h +++ b/src/runtime/contrib/dnnl/dnnl_kernel.h @@ -36,19 +36,20 @@ namespace contrib { using namespace dnnl; extern "C" TVM_DLL void dnnl_conv2d(float* data, float* weights, float* out, int p_N_, int p_C_, - int p_H_, int p_W_, int p_O_, int p_G_, int p_Ph_, int p_Pw_, - int p_Kh_, int p_Kw_, int p_Sh_, int p_Sw_); + int p_H_, int p_W_, int p_O_, int p_G_, int p_Ph0_, int p_Pw0_, + int p_Ph1_, int p_Pw1_, int p_Kh_, int p_Kw_, int p_Sh_, + int p_Sw_); extern "C" TVM_DLL void dnnl_fused_conv2d_relu(float* data, float* weights, float* out, int p_N_, int p_C_, int p_H_, int p_W_, int p_O_, int p_G_, - int p_Ph_, int p_Pw_, int p_Kh_, int p_Kw_, - int p_Sh_, int p_Sw_); + int p_Ph0_, int p_Pw0_, int p_Ph1_, int p_Pw1_, + int p_Kh_, int p_Kw_, int p_Sh_, int p_Sw_); extern "C" TVM_DLL void dnnl_fused_conv2d_bias_relu(float* data, float* weights, float* bias, float* out, int p_N_, int p_C_, int p_H_, - int p_W_, int p_O_, int p_G_, int p_Ph_, - int p_Pw_, int p_Kh_, int p_Kw_, int p_Sh_, - int p_Sw_); + int p_W_, int p_O_, int p_G_, int p_Ph0_, + int p_Pw0_, int p_Ph1_, int p_Pw1_, int p_Kh_, + int p_Kw_, int p_Sh_, int p_Sw_); extern "C" TVM_DLL void dnnl_dense(float* data, float* weight, float* out, int p_B_, int p_I_, int p_O_); diff --git a/tests/python/relay/test_external_codegen.py b/tests/python/relay/test_external_codegen.py index ad5f2aa9d4fa..41c113684f0a 100644 --- a/tests/python/relay/test_external_codegen.py +++ b/tests/python/relay/test_external_codegen.py @@ -213,6 +213,39 @@ def constant_updater(expr, symbol): tvm._ffi.registry.remove_global_func("relay.ext.ccompiler.constant_updater") +@pytest.mark.skipif( + not tvm.get_global_func("relay.ext.dnnl", True), + reason="skip because DNNL codegen is not available", +) +@parametrize_external_json_codegen_checks +def test_extern_dnnl_padding(check_result): + dtype = "float32" + ishape = (1, 1, 99, 12) + w1shape = (54, 1, 3, 3) + data0 = relay.var("data0", shape=(ishape), dtype=dtype) + weight0 = relay.var("weight0", shape=(w1shape), dtype=dtype) + out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), strides=(2, 2), padding=(1, 0, 1, 1)) + f = relay.Function([data0, weight0], out) + ref_mod = tvm.IRModule() + ref_mod["main"] = f + + data1 = relay.var("data0", shape=(ishape), dtype=dtype) + weight1 = relay.var("weight0", shape=(w1shape), dtype=dtype) + f = set_external_func_attr(f, "dnnl", "dnnl_0") + call = relay.Call(f, [data1, weight1]) + mod = tvm.IRModule.from_expr(call) + + i_data = np.random.uniform(0, 1, ishape).astype(dtype) + w_data = np.random.uniform(0, 1, w1shape).astype(dtype) + + ref_res = relay.create_executor("graph", mod=ref_mod, device=tvm.cpu()).evaluate()( + i_data, w_data + ) + check_result( + mod, {"data0": i_data, "weight0": w_data}, (1, 54, 50, 6), ref_res.numpy(), tol=1e-5 + ) + + @pytest.mark.skipif( not tvm.get_global_func("relay.ext.dnnl", True), reason="skip because DNNL codegen is not available", diff --git a/tests/python/relay/utils/external_codegen.py b/tests/python/relay/utils/external_codegen.py index 85583f6ccc5d..2d73ef85be28 100644 --- a/tests/python/relay/utils/external_codegen.py +++ b/tests/python/relay/utils/external_codegen.py @@ -59,7 +59,7 @@ def parametrize_external_json_codegen_checks(test): def update_lib(lib): test_dir = os.path.dirname(os.path.realpath(os.path.expanduser(__file__))) - source_dir = os.path.join(test_dir, "..", "..", "..") + source_dir = os.path.join(test_dir, "..", "..", "..", "..") contrib_path = os.path.join(source_dir, "src", "runtime", "contrib") kwargs = {}