Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Autoscan] add 0D-tensor test for CPU #10214

Merged
merged 17 commits into from
Apr 23, 2023
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ lite_option(LITE_ON_TINY_PUBLISH "Publish tiny predictor lib."
lite_option(LITE_ON_MODEL_OPTIMIZE_TOOL "Build the model optimize tool" OFF)
lite_option(LITE_WITH_BENCHMARK_TEST "Build benchmark test cases" OFF)
lite_option(LITE_THREAD_POOL "Enable thread pool in lite" OFF)
lite_option(LITE_SKIP_SUPPORT_0_DIM_TENSOR_PASS "Skip support_0_dim_tensor_pass" OFF)
# publish options
lite_option(LITE_BUILD_EXTRA "Enable extra algorithm support in Lite, both kernels and operators" OFF)
lite_option(LITE_BUILD_TAILOR "Enable tailoring library according to model" OFF)
Expand Down
4 changes: 4 additions & 0 deletions cmake/configure.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -263,3 +263,7 @@ if (EMSCRIPTEN)
add_compile_options("-pthread")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pthread")
endif()

if(LITE_SKIP_SUPPORT_0_DIM_TENSOR_PASS)
add_definitions("-DLITE_SKIP_SUPPORT_0_DIM_TENSOR_PASS")
endif()
2 changes: 2 additions & 0 deletions lite/core/optimizer/optimizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,9 @@ std::unique_ptr<RuntimeProgram> RunDefaultOptimizer(
std::vector<std::string> passes_local{
{"lite_quant_dequant_fuse_pass",
"weight_quantization_preprocess_pass",
#ifndef LITE_SKIP_SUPPORT_0_DIM_TENSOR_PASS
"support_0_dim_tensor_pass",
#endif // LITE_SKIP_SUPPORT_0_DIM_TENSOR_PASS
"op_transformation_pass",
"assign_value_calc_offline_pass",
"ssd_boxes_calc_offline_pass",
Expand Down
95 changes: 95 additions & 0 deletions lite/kernels/host/bitwise_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,15 @@ bool naive_not<bool>(bool a) {
return !a;
}

#define PROCESS_0D \
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这个有点 trick,能直接改 GenBatchElementWiseArg 吗?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这位置我最开始也是想改 GenBatchElementWiseArg。。。但是改完发现有内存错误,所以想先绕过这块,等之后再修复可以吗

if (param.X->dims().size() == 0 && param.Y->dims().size() == 0) { \
auto out_ptr = param.Out->template mutable_data<T>(); \
auto x_ptr = param.X->template data<T>(); \
auto y_ptr = param.Y->template data<T>(); \
out_ptr[0] = AndFunc(x_ptr[0], y_ptr[0]); \
return; \
}

template <typename T>
void BitwiseAndCompute<T>::Run() {
auto& param = this->template Param<param_t>();
Expand All @@ -68,6 +77,37 @@ void BitwiseAndCompute<T>::Run() {

// ElementwiseComputeEx can do broadcasting
std::function<T(T, T)> AndFunc = naive_and<T>;
PROCESS_0D;
auto batch_arg = lite::kernels::host::GenBatchElementWiseArg<T>(
param.X, param.Y, param.Out);
common_elmentwise_op_naive_cpu(batch_arg, AndFunc);
return;
}

template <typename T>
void BitwiseXorCompute<T>::Run() {
auto& param = this->template Param<param_t>();
CHECK(param.X);
CHECK(param.Y);

// ElementwiseComputeEx can do broadcasting
std::function<T(T, T)> AndFunc = naive_xor<T>;
PROCESS_0D;
auto batch_arg = lite::kernels::host::GenBatchElementWiseArg<T>(
param.X, param.Y, param.Out);
common_elmentwise_op_naive_cpu(batch_arg, AndFunc);
return;
}

template <typename T>
void BitwiseOrCompute<T>::Run() {
auto& param = this->template Param<param_t>();
CHECK(param.X);
CHECK(param.Y);

// ElementwiseComputeEx can do broadcasting
std::function<T(T, T)> AndFunc = naive_or<T>;
PROCESS_0D;
auto batch_arg = lite::kernels::host::GenBatchElementWiseArg<T>(
param.X, param.Y, param.Out);
common_elmentwise_op_naive_cpu(batch_arg, AndFunc);
Expand All @@ -88,6 +128,7 @@ void BitwiseNotCompute<T>::Run() {
return;
}

#undef PROCESS_0D
} // namespace host
} // namespace kernels
} // namespace lite
Expand Down Expand Up @@ -143,3 +184,57 @@ REGISTER_LITE_KERNEL(
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt64))})
.Finalize();

#ifdef LITE_BUILD_EXTRA
using bitwise_xor_bool = paddle::lite::kernels::host::BitwiseXorCompute<bool>;
REGISTER_LITE_KERNEL(bitwise_xor, kHost, kAny, kNCHW, bitwise_xor_bool, bl)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kBool))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kBool))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kBool))})
.Finalize();

using bitwise_xor_int32_t =
paddle::lite::kernels::host::BitwiseXorCompute<int32_t>;
REGISTER_LITE_KERNEL(
bitwise_xor, kHost, kAny, kNCHW, bitwise_xor_int32_t, int32)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt32))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt32))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt32))})
.Finalize();

using bitwise_xor_int64_t =
paddle::lite::kernels::host::BitwiseXorCompute<int64_t>;
REGISTER_LITE_KERNEL(
bitwise_xor, kHost, kAny, kNCHW, bitwise_xor_int64_t, int64)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt64))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt64))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt64))})
.Finalize();

using bitwise_or_bool = paddle::lite::kernels::host::BitwiseOrCompute<bool>;
REGISTER_LITE_KERNEL(bitwise_or, kHost, kAny, kNCHW, bitwise_or_bool, bl)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kBool))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kBool))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kBool))})
.Finalize();

using bitwise_or_int32_t =
paddle::lite::kernels::host::BitwiseOrCompute<int32_t>;
REGISTER_LITE_KERNEL(bitwise_or, kHost, kAny, kNCHW, bitwise_or_int32_t, int32)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt32))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt32))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt32))})
.Finalize();

using bitwise_or_int64_t =
paddle::lite::kernels::host::BitwiseOrCompute<int64_t>;
REGISTER_LITE_KERNEL(bitwise_or, kHost, kAny, kNCHW, bitwise_or_int64_t, int64)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt64))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt64))})
.BindOutput("Out",
{LiteType::GetTensorTy(TARGET(kHost), PRECISION(kInt64))})
.Finalize();
#endif // LITE_BUILD_EXTRA
34 changes: 34 additions & 0 deletions lite/kernels/host/bitwise_compute.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,40 @@ class BitwiseAndCompute : public KernelLite<TARGET(kHost), PRECISION(kAny)> {
#endif
};

template <typename T>
class BitwiseXorCompute : public KernelLite<TARGET(kHost), PRECISION(kAny)> {
public:
using param_t = operators::BitwiseParam;

void Run() override;

virtual ~BitwiseXorCompute() = default;
#ifdef LITE_WITH_PROFILE
virtual void SetProfileRuntimeKernelInfo(
paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
std::string kernel_func_name_{"NotImplForBitXor"};
#endif
};

template <typename T>
class BitwiseOrCompute : public KernelLite<TARGET(kHost), PRECISION(kAny)> {
public:
using param_t = operators::BitwiseParam;

void Run() override;

virtual ~BitwiseOrCompute() = default;
#ifdef LITE_WITH_PROFILE
virtual void SetProfileRuntimeKernelInfo(
paddle::lite::profile::OpCharacter* ch) {
ch->kernel_func_name = kernel_func_name_;
}
std::string kernel_func_name_{"NotImplForBitOr"};
#endif
};

template <typename T>
class BitwiseNotCompute : public KernelLite<TARGET(kHost), PRECISION(kAny)> {
public:
Expand Down
8 changes: 4 additions & 4 deletions lite/kernels/host/gaussian_random_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,10 @@ void GaussRandomCompute::Run() {
float gstd = param.gauss_std;

// output shape
if (param.ShapeTensor != nullptr) {
if (param.shape.size() > 0) {
DDimLite dims(param.shape);
param.Out->Resize(dims);
} else if (param.ShapeTensor != nullptr) {
std::vector<int64_t> tmp{};
auto ptr = param.ShapeTensor->data<int>();
for (int i = 0; i < param.ShapeTensor->numel(); i++) {
Expand All @@ -72,9 +75,6 @@ void GaussRandomCompute::Run() {
}
DDimLite dims(tmp);
param.Out->Resize(dims);
} else {
DDimLite dims(param.shape);
param.Out->Resize(dims);
}
auto data = param.Out->mutable_data<float>();
int size = param.Out->numel();
Expand Down
3 changes: 1 addition & 2 deletions lite/operators/empty_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,7 @@ bool EmptyOp::InferShapeImpl() const {
} else if (!param_.shape.empty()) {
OutShape = param_.shape;
} else {
LOG(FATAL) << "no valid out_shape. Must set one of shape_tensor, or "
"shape_tensor_list, or shape.";
LOG(WARNING) << "EmptyOp output is 0D-tensor.";
}

param_.Out->Resize(OutShape);
Expand Down
3 changes: 1 addition & 2 deletions lite/operators/fill_constant_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,7 @@ bool FillConstantOp::InferShapeImpl() const {
} else if (!param_.shape.empty()) {
out_shape = param_.shape;
} else {
LOG(FATAL) << "no valid out_shape. Must set one of shape_tensor, or "
"shape_tensor_list, or shape.";
LOG(WARNING) << "FillConstant is 0D-tensor output";
}

param_.out->Resize(out_shape);
Expand Down
8 changes: 1 addition & 7 deletions lite/operators/gaussian_random_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,7 @@ namespace paddle {
namespace lite {
namespace operators {

bool GaussRandomOp::CheckShape() const {
if (param_.ShapeTensor == nullptr && param_.ShapeTensorList.empty()) {
CHECK(param_.shape.size() > 0)
<< "Attribute(shape) of GaussRandomOp must be set and shape.size() > 0";
}
return true;
}
bool GaussRandomOp::CheckShape() const { return true; }

bool GaussRandomOp::InferShapeImpl() const {
auto shape = param_.shape;
Expand Down
Loading